In [1]:
import glob
import os

def pull_location_file(location):
    
    """
    This function matches an inputted city within CONUS to the associated climate file for that location. 
    The location is matched via a dictionary to the associated The file
    pulled corresponds to the parameter input which could either be average temperature or average precipitation.
    The file name is returned.
    
    Inputs:
    location (string): structured "City, ST" in which the state is the two letter state abbreviation.
    
    parameter (string): either "Average Temperature" or "Average Precipitation"
    
    Returns:
    used_filepath (string): filepath corresponding to the location of the city climate file.
    
    """
    
    ## Load the dictionary with cities and their corresponding Cooperative Observer Identification Numbers.
    #    This dictionary can be expanded to all cities located in the climate directory.
    
    city_dictionary = {'Raleigh, NC': '317074', 'Albany, NY': '300047',
                      'Seattle, WA': '457478', 'Dallas, TX': '412243',
                      'Salt Lake City, UT': '427578', 'Bismark, ND': '320818',
                      'Kansas City, MO': '234379', 'Flagstaff, AZ': '023009',
                      'Indianapolis, IN': '124260', 'Tallahassee, FL': '088756'}
    
    # Pull the city's ID
    desired_ID = city_dictionary[location]
    
    # FI
    filepath = glob.glob(os.getcwd() + '/CONUS_city_climate_stats/USC00'+ desired_ID +'.FLs.52j.tavg') 
    
    used_filepath = filepath[0]
    
    return used_filepath


In [81]:
import glob
import os
import pandas as pd
#rom pull_location_file import pull_location_file

def parse_climate_data(used_filename):
    
    """
    Processes a city climate data file that includes and format into a Pandas dataframe.
    Values of -9999 are interpreted as missing values. Each datapoint is scaled accordingly.
    
    Parameters:
        filename (String) - The filename of the city climate file to be parsed.
    Returns:
        (df_city) - A DataFrame containing all of the parsed and adjusted data.
    """
    
    headings = ['Station Climate ID', 'Year', 'January', 'February', 'March', 'April',
                                 'May', 'June', 'July', 'August', 'September',
                                 'October', 'November', 'December']
    
    # We need to specify the widths as the default colspec 'infer' does not correctly pick up rarely seen missing data values. 

    widths = [11, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9]

    df = pd.read_fwf(used_filename, names=headings, header=None, widths=widths, na_values=[-9999])
    
    df[['January','February','March','April','May','June','July','August',
       'September','October','November','December']]= df[['January','February','March','April','May','June','July','August',
       'September','October','November','December']].replace(regex=['a','b','c','d','e','f','g','h','i','E','X',
                                                                   'D','I','L','M','O','S','W','A','M','Q'], value="")
    
    df[['January','February','March','April','May','June','July','August',
       'September','October','November','December']]= df[['January','February','March','April','May','June','July','August',
       'September','October','November','December']].astype(float)/100

    return df

used_filepath = pull_location_file('Flagstaff, AZ')
parse_climate_data(used_filepath)


    Station Climate ID  Year January February March April   May  June  July  \
0          USC00023009  1904     NaN      NaN   NaN   NaN   NaN   NaN   NaN   
1          USC00023009  1905    -159      NaN   147   417   594  1218  1688   
2          USC00023009  1906    -470     -160    72   400   734  1138  1677   
3          USC00023009  1907    -389      119   171   575   639  1047  1684   
4          USC00023009  1908    -163     -158   242   488   NaN  1205  1713   
..                 ...   ...     ...      ...   ...   ...   ...   ...   ...   
114        USC00023009  2018      55      -12   250   756   955  1548  1852   
115        USC00023009  2019     -58     -339   280   671   632  1278  1790   
116        USC00023009  2020    -144        0   182   591  1053  1408  1822   
117        USC00023009  2021    -209      -90   110   648   903  1615  1887   
118        USC00023009  2022    -103     -136   261   631  1048  1555  1873   

    August September October November December  
0 

Unnamed: 0,Station Climate ID,Year,January,February,March,April,May,June,July,August,September,October,November,December
0,USC00023009,1904,,,,,,,,,,,,-2.10
1,USC00023009,1905,-1.59,,1.47,4.17,5.94,12.18,16.88,16.69,12.98,7.05,0.52,-5.40
2,USC00023009,1906,-4.70,-1.60,0.72,4.00,7.34,11.38,16.77,14.76,11.00,6.11,-1.18,-1.54
3,USC00023009,1907,-3.89,1.19,1.71,5.75,6.39,10.47,16.84,15.93,12.40,8.28,0.90,-1.10
4,USC00023009,1908,-1.63,-1.58,2.42,4.88,,12.05,17.13,15.99,11.99,5.18,0.57,-2.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,USC00023009,2018,0.55,-0.12,2.50,7.56,9.55,15.48,18.52,17.02,13.37,6.71,0.95,-1.59
115,USC00023009,2019,-0.58,-3.39,2.80,6.71,6.32,12.78,17.90,17.58,13.55,5.90,1.96,-2.08
116,USC00023009,2020,-1.44,0.00,1.82,5.91,10.53,14.08,18.22,18.67,13.34,9.29,3.04,-2.64
117,USC00023009,2021,-2.09,-0.90,1.10,6.48,9.03,16.15,18.87,16.85,14.19,5.99,3.72,-0.30
