#### NOAAA Günlük Havadurumu verisi işleme

NOAA günlük hava durumu/sıcaklık verisini pandas dataframe olarak işleyen python notebook'u.<br>
Günlük havadurumu verisine ftp://ftp.ncdc.noaa.gov/pub/data/gsod/ adresinden ulaşılabilir.

In [1]:
import os
import numpy as np
import pandas as pd

def display_all(df):
    with pd.option_context("display.max_rows", 1000, "display.max_columns", 1000): 
        display(df)
        
def fahrenheit_to_celsius(df, col_name):
    df[col_name] = (df[col_name] - 32) * 5/9

In [2]:
filename = './data/170630-99999-2020.op'

with open(filename) as file:
    all_lines = file.readlines()
    
display(all_lines[0:3])

['STN--- WBAN   YEARMODA    TEMP       DEWP      SLP        STP       VISIB      WDSP     MXSPD   GUST    MAX     MIN   PRCP   SNDP   FRSHTT\n',
 '170630 99999  20200101    43.1 24    35.7 24  9999.9  0  9999.9  0    5.8 24    7.7 24   17.1   27.0    48.2*   39.2*  0.00G 999.9  010000\n',
 '170630 99999  20200102    40.8 24    37.3 24  9999.9  0  9999.9  0    6.5 24   10.3 24   15.0   27.0    46.4*   35.6*  0.49G   0.4  010000\n']

In [3]:
all_lines_parsed = [line.split() for line in all_lines]

header = ['STN', 'WBAN', 'YEARMODA', 'TEMP', 'TEMP_INFO', 'DEWP', 'DEWP_INFO', 'SLP', 'SLP_INFO', 'STP', 'STP_INFO', 'VISIB', 'VISIB_INFO', 'WDSP', 'WDSP_INFO', 'MXSPD', 'GUST', 'MAX', 'MIN', 'PRCP', 'SNDP', 'FRSHTT']
data = all_lines_parsed[1:]

df_havadurumu = pd.DataFrame(data, columns=header)
df_havadurumu.replace('999.9', np.NaN, inplace=True)

In [4]:
# Cleaning Minimum and Maximum Temp
df_havadurumu['MAX'] = df_havadurumu['MAX'].map(lambda x: x.rstrip('*'))
df_havadurumu['MIN'] = df_havadurumu['MIN'].map(lambda x: x.rstrip('*'))
df_havadurumu['PRCP'] = df_havadurumu['PRCP'].map(lambda x: x.rstrip('AEFIGH'))

numerical_columns = ['TEMP', 'DEWP', 'SLP', 'STP', 'VISIB', 'WDSP', 'MXSPD', 'GUST', 'MAX', 'MIN','PRCP', 'SNDP']

df_havadurumu[numerical_columns] = df_havadurumu[numerical_columns].apply(pd.to_numeric, errors='coerce')

columns = ['TEMP', 'MAX', 'MIN']
_ = [fahrenheit_to_celsius(df_havadurumu, column) for column in columns]

df_havadurumu['DATE'] = pd.to_datetime(df_havadurumu['YEARMODA'], format='%Y%m%d')
df_havadurumu.drop(['YEARMODA'], axis=1, inplace=True)
df_havadurumu.set_index(['DATE'], inplace=True)

In [5]:
df_havadurumu.head()

Unnamed: 0_level_0,STN,WBAN,TEMP,TEMP_INFO,DEWP,DEWP_INFO,SLP,SLP_INFO,STP,STP_INFO,...,VISIB_INFO,WDSP,WDSP_INFO,MXSPD,GUST,MAX,MIN,PRCP,SNDP,FRSHTT
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,170630,99999,6.166667,24,35.7,24,9999.9,0,9999.9,0,...,24,7.7,24,17.1,27.0,9.0,4.0,0.0,,10000
2020-01-02,170630,99999,4.888889,24,37.3,24,9999.9,0,9999.9,0,...,24,10.3,24,15.0,27.0,8.0,2.0,0.49,0.4,10000
2020-01-03,170630,99999,5.611111,24,35.7,24,9999.9,0,9999.9,0,...,24,6.5,24,11.1,18.1,9.0,2.0,0.19,,0
2020-01-04,170630,99999,6.777778,24,35.9,24,9999.9,0,9999.9,0,...,24,6.1,24,13.0,21.0,9.0,3.0,0.0,,0
2020-01-05,170630,99999,5.722222,24,36.1,24,9999.9,0,9999.9,0,...,24,6.1,24,19.0,26.0,10.0,1.777778,0.08,,10000


In [6]:
COLUMNS_TO_KEEP = ['TEMP', 'MAX', 'MIN', 'PRCP', 'SNDP']
df_havadurumu.loc[:,COLUMNS_TO_KEEP].head()

Unnamed: 0_level_0,TEMP,MAX,MIN,PRCP,SNDP
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01,6.166667,9.0,4.0,0.0,
2020-01-02,4.888889,8.0,2.0,0.49,0.4
2020-01-03,5.611111,9.0,2.0,0.19,
2020-01-04,6.777778,9.0,3.0,0.0,
2020-01-05,5.722222,10.0,1.777778,0.08,
