In [1]:
import pandas as pd
import os

In [2]:
in_dir = 'Data/Weather/Stations-Observations Merged/'

out_dir = 'Data/Weather/Stations-Observations Merged-Converted/'
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

In [3]:
# Define conditions to filter the data frame
# Filter the df down to only the values that we want
def filter_conditions(df):
    cond1 = df['ELEMENT'] == 'PRCP' # Precipitation (tenths of mm)
    cond2 = df['ELEMENT'] == 'SNOW' # Snowfall (mm)
    cond3 = df['ELEMENT'] == 'SNWD' # Snow depth (mm)
    cond4 = df['ELEMENT'] == 'TMAX' # Maximum temperature (tenths of degrees C)
    cond5 = df['ELEMENT'] == 'TMIN' # Minimum temperature (tenths of degrees C)
    cond6 = df['ELEMENT'] == 'AWND' # Average wind speed (tenths of meters per second)
    cond7 = df['ELEMENT'] == 'TAVG' # Average temperature (tenths of degrees C)
    allcond = cond1 | cond2 | cond3 | cond4 | cond5 | cond6 | cond7
    df=df[allcond]
    

In [4]:
def drop_columns(df):
    drop_cols = ['M_FLAG', 'Q_FLAG', 'S_FLAG', 'OBS_TIME']
    df.drop(drop_cols, axis=1, inplace=True)

In [5]:
# Define function to convert celcius to fahrenheit
# This also accounts for the tenths of degrees part
def tc_to_f(x):
    x=x/10
    return (x*9/5)+32

In [6]:
# Define function to convert tenths of mm to inches
def mm_to_inches(x):
    return x*0.0393701

In [7]:
# define function to convert tenths of mm to inches
def tmm_to_inches(x):
    x=x/10
    return x*0.0393701

In [8]:
# Define function to convert tenths of meters per second to miles per hour
def mps_to_mph(x):
    x=x/10
    return x*2.23694

In [9]:
def convert(df):
    # Convert PRCP from tenths of mm to inches
    df.loc[df.ELEMENT == 'PRCP', 'DATA_VALUE'] = df.loc[df.ELEMENT == 'PRCP', 'DATA_VALUE'].apply(tmm_to_inches)
    # Convert SNOW from mm to inches
    df.loc[df.ELEMENT == 'SNOW', 'DATA_VALUE'] = df.loc[df.ELEMENT == 'SNOW', 'DATA_VALUE'].apply(mm_to_inches)
    # Convert SNWD from mm to inches
    df.loc[df.ELEMENT == 'SNWD', 'DATA_VALUE'] = df.loc[df.ELEMENT == 'SNWD', 'DATA_VALUE'].apply(mm_to_inches)
    # Convert TMAX from tenths of degrees C to fahrenheit
    df.loc[df.ELEMENT == 'TMAX', 'DATA_VALUE'] = df.loc[df.ELEMENT == 'TMAX', 'DATA_VALUE'].apply(tc_to_f)
    # Convert TMIN from tenths of degrees C to fahrenheit
    df.loc[df.ELEMENT == 'TMIN', 'DATA_VALUE'] = df.loc[df.ELEMENT == 'TMIN', 'DATA_VALUE'].apply(tc_to_f)
    # Convert AWND from tenths of meters per second to miles per hour
    df.loc[df.ELEMENT == 'AWND', 'DATA_VALUE'] = df.loc[df.ELEMENT == 'AWND', 'DATA_VALUE'].apply(mps_to_mph)
    # Convert TAVG from tenths of degrees C to fahrenheit
    df.loc[df.ELEMENT == 'TAVG', 'DATA_VALUE'] = df.loc[df.ELEMENT == 'TAVG', 'DATA_VALUE'].apply(tc_to_f)

In [10]:
def rename_columns(df):
    new_elements = {'PRCP':'PRCP_INCH', 'SNOW':'SNOW_INCH', 'SNWD':'SNWD_INCH', 
                    'TMAX':'TMAX_F', 'TMIN':'TMIN_F', 'TAVG':'TAVG_F', 'AWND':'AWND_MPH'}
    df['ELEMENT'].replace(new_elements, inplace=True)

In [11]:
def write_to_file(filename, df):
    filename = filename.replace('.csv','') + '_converted.csv'
    f = os.path.join(out_dir, filename)
    df.to_csv(f, index=False)

In [13]:
for filename in os.listdir(in_dir):
    if not filename.startswith('.'): # ignore hidden files
        f = os.path.join(in_dir, filename)
        if os.path.isfile(f):
            df=pd.read_csv(f)
            filter_conditions(df)
            drop_columns(df)
            convert(df)
            rename_columns(df)
            write_to_file(filename, df)