## Southwest USA Daily Normals
* Data Source: https://www.ncdc.noaa.gov/cdo-web/search?datasetid=NORMAL_DLY

In [30]:
# Dependencies
import pandas as pd
import numpy as np
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [31]:
# Read in first dataset
df1 = pd.read_csv('daily-normals-SW-1.csv')
df1.head()

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DLY-TAVG-NORMAL,DLY-TAVG-STDDEV,DLY-TMAX-NORMAL,DLY-TMAX-STDDEV,DLY-TMIN-NORMAL,DLY-TMIN-STDDEV
0,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-01,,,,,,
1,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-02,,,,,,
2,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-03,,,,,,
3,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-04,,,,,,
4,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-05,,,,,,


In [32]:
# Read in second dataset
df2 = pd.read_csv('daily-normals-SW-2.csv')
df2.head()

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DLY-TAVG-NORMAL,DLY-TAVG-STDDEV,DLY-TMAX-NORMAL,DLY-TMAX-STDDEV,DLY-TMIN-NORMAL,DLY-TMIN-STDDEV
0,USC00312827,"ENFIELD, NC US",36.1686,-77.675,33.5,01-01,,,,,,
1,USC00312827,"ENFIELD, NC US",36.1686,-77.675,33.5,01-02,,,,,,
2,USC00312827,"ENFIELD, NC US",36.1686,-77.675,33.5,01-03,,,,,,
3,USC00312827,"ENFIELD, NC US",36.1686,-77.675,33.5,01-04,,,,,,
4,USC00312827,"ENFIELD, NC US",36.1686,-77.675,33.5,01-05,,,,,,


In [33]:
# Read in third dataset
df3 = pd.read_csv('daily-normals-SW-3.csv')
df3.head()

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DLY-TAVG-NORMAL,DLY-TAVG-STDDEV,DLY-TMAX-NORMAL,DLY-TMAX-STDDEV,DLY-TMIN-NORMAL,DLY-TMIN-STDDEV
0,USC00406271,"MORRISTOWN RADIO WCR, TN US",36.2067,-83.3325,409.7,01-01,36.0,10.1,45.7,11.0,26.4,10.9
1,USC00406271,"MORRISTOWN RADIO WCR, TN US",36.2067,-83.3325,409.7,01-02,36.0,10.1,45.6,11.0,26.3,10.9
2,USC00406271,"MORRISTOWN RADIO WCR, TN US",36.2067,-83.3325,409.7,01-03,35.9,10.1,45.6,10.9,26.2,10.9
3,USC00406271,"MORRISTOWN RADIO WCR, TN US",36.2067,-83.3325,409.7,01-04,35.9,10.0,45.6,10.9,26.2,10.9
4,USC00406271,"MORRISTOWN RADIO WCR, TN US",36.2067,-83.3325,409.7,01-05,35.8,10.0,45.6,10.9,26.1,10.9


In [58]:
# Combine datasets
df = pd.concat([df1, df2, df3])
df

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DLY-TAVG-NORMAL,DLY-TAVG-STDDEV,DLY-TMAX-NORMAL,DLY-TMAX-STDDEV,DLY-TMIN-NORMAL,DLY-TMIN-STDDEV
0,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-01,,,,,,
1,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-02,,,,,,
2,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-03,,,,,,
3,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-04,,,,,,
4,USC00441955,"CONCORD 4 SSW, VA US",37.2819,-78.9591,248.4,01-05,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
218103,USC00017947,"SULLIGENT, AL US",33.8998,-88.1326,106.7,12-27,,,,,,
218104,USC00017947,"SULLIGENT, AL US",33.8998,-88.1326,106.7,12-28,,,,,,
218105,USC00017947,"SULLIGENT, AL US",33.8998,-88.1326,106.7,12-29,,,,,,
218106,USC00017947,"SULLIGENT, AL US",33.8998,-88.1326,106.7,12-30,,,,,,


In [59]:
# Check for missing values
df.count()

STATION            418300
NAME               418300
LATITUDE           418300
LONGITUDE          418300
ELEVATION          418300
DATE               418300
DLY-TAVG-NORMAL    342576
DLY-TAVG-STDDEV    261690
DLY-TMAX-NORMAL    342576
DLY-TMAX-STDDEV    261690
DLY-TMIN-NORMAL    342576
DLY-TMIN-STDDEV    261690
dtype: int64

In [80]:
# Drop all rows that are missing daily normals (avg, max, min)
df_clean_1 = df.dropna(subset=['DLY-TAVG-NORMAL', 'DLY-TMAX-NORMAL', 'DLY-TMIN-NORMAL'])

In [69]:
# Check for additional missing values
df_clean_1.count()

STATION            342576
NAME               342576
LATITUDE           342576
LONGITUDE          342576
ELEVATION          342576
DATE               342576
DLY-TAVG-NORMAL    342576
DLY-TAVG-STDDEV    261690
DLY-TMAX-NORMAL    342576
DLY-TMAX-STDDEV    261690
DLY-TMIN-NORMAL    342576
DLY-TMIN-STDDEV    261690
dtype: int64

In [71]:
df_clean_1

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DLY-TAVG-NORMAL,DLY-TAVG-STDDEV,DLY-TMAX-NORMAL,DLY-TMAX-STDDEV,DLY-TMIN-NORMAL,DLY-TMIN-STDDEV
366,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-01,29.8,10.3,38.6,11.8,21.0,10.4
367,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-02,29.7,10.3,38.5,11.8,20.9,10.5
368,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-03,29.7,10.4,38.4,11.9,20.9,10.6
369,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-04,29.6,10.4,38.4,11.9,20.8,10.6
370,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-05,29.5,10.5,38.3,11.9,20.8,10.7
...,...,...,...,...,...,...,...,...,...,...,...,...
217371,USC00402711,"EAGLEVILLE 1 SW, TN US",35.7285,-86.6435,243.8,12-27,38.3,,47.8,,28.7,
217372,USC00402711,"EAGLEVILLE 1 SW, TN US",35.7285,-86.6435,243.8,12-28,38.1,,47.7,,28.6,
217373,USC00402711,"EAGLEVILLE 1 SW, TN US",35.7285,-86.6435,243.8,12-29,38.0,,47.6,,28.5,
217374,USC00402711,"EAGLEVILLE 1 SW, TN US",35.7285,-86.6435,243.8,12-30,37.9,,47.4,,28.4,


In [72]:
import re
def findState(string):
    regex = re.compile(',\s([A-Z][A-Z])')
    state = regex.findall(string)
    return state[0]

In [81]:
# Pull state out of station name and add to state column
df_clean_2 = df_clean_1.copy()
df_clean_2['STATE'] = df_clean_2['NAME'].apply(findState)
df_clean_2

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DLY-TAVG-NORMAL,DLY-TAVG-STDDEV,DLY-TMAX-NORMAL,DLY-TMAX-STDDEV,DLY-TMIN-NORMAL,DLY-TMIN-STDDEV,STATE
366,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-01,29.8,10.3,38.6,11.8,21.0,10.4,WV
367,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-02,29.7,10.3,38.5,11.8,20.9,10.5,WV
368,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-03,29.7,10.4,38.4,11.9,20.9,10.6,WV
369,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-04,29.6,10.4,38.4,11.9,20.8,10.6,WV
370,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,289.6,01-05,29.5,10.5,38.3,11.9,20.8,10.7,WV
...,...,...,...,...,...,...,...,...,...,...,...,...,...
217371,USC00402711,"EAGLEVILLE 1 SW, TN US",35.7285,-86.6435,243.8,12-27,38.3,,47.8,,28.7,,TN
217372,USC00402711,"EAGLEVILLE 1 SW, TN US",35.7285,-86.6435,243.8,12-28,38.1,,47.7,,28.6,,TN
217373,USC00402711,"EAGLEVILLE 1 SW, TN US",35.7285,-86.6435,243.8,12-29,38.0,,47.6,,28.5,,TN
217374,USC00402711,"EAGLEVILLE 1 SW, TN US",35.7285,-86.6435,243.8,12-30,37.9,,47.4,,28.4,,TN


In [82]:
# Review sample to verify accurate state assignment
df_test_states = df_clean_2.sample(20)
df_test_states

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DLY-TAVG-NORMAL,DLY-TAVG-STDDEV,DLY-TMAX-NORMAL,DLY-TMAX-STDDEV,DLY-TMIN-NORMAL,DLY-TMIN-STDDEV,STATE
30275,USW00013877,"BRISTOL TRI CITY AIRPORT, TN US",36.4731,-82.4044,457.2,09-25,63.6,6.1,75.9,7.0,51.3,7.4,TN
34503,USC00409502,"WAYNESBORO, TN US",35.3041,-87.7591,228.6,04-15,57.0,8.6,71.3,9.6,42.6,9.9,TN
58253,USC00467372,"R.D. BAILEY LAKE, WV US",37.5973,-81.8204,426.7,03-02,37.9,,50.4,,25.5,,WV
162961,USC00013620,"HALEYVILLE, AL US",34.2314,-87.6352,280.4,04-20,63.8,7.7,74.8,8.3,52.9,8.4,AL
19564,USC00401950,"COLLIERVILLE, TN US",35.0781,-89.6653,91.4,06-20,77.7,4.5,88.2,5.2,67.2,4.9,TN
196892,USC00153430,"GREENSBURG, KY US",37.2571,-85.5012,178.3,01-10,34.1,11.4,43.9,12.5,24.4,12.1,KY
16733,USC00466591,"OAK HILL, WV US",37.9715,-81.1507,621.8,09-21,61.7,6.6,72.5,7.4,50.9,7.7,WV
158660,USC00150422,"BARREN RIVER LAKE, KY US",36.8977,-86.1247,187.5,07-19,79.7,4.5,91.8,5.3,67.7,4.9,KY
124985,USC00382386,"DILLON, SC US",34.4091,-79.3563,36.0,07-04,80.2,4.0,91.7,5.0,68.7,4.4,SC
58525,USC00315996,"MURFREESBORO, NC US",36.4521,-77.0803,30.5,11-29,7.9,-12.8,14.3,-12.3,1.6,-12.3,NC


In [83]:
# Determine states included in dataset
states = df_clean_2['STATE'].unique()
states

array(['WV', 'VA', 'SC', 'MD', 'NC', 'TN', 'GA', 'AL', 'KY', 'IN', 'IL'],
      dtype=object)

In [84]:
# Drop rows associated with states not in the Southwest
for state in ['MD', 'IN', 'IL']:
    df_clean_2.drop(df_clean_2[df_clean_2['STATE'] == state].index, inplace=True)

# Check that unwanted states were dropped
df_clean_2['STATE'].unique()

array(['WV', 'VA', 'SC', 'NC', 'TN', 'GA', 'AL', 'KY'], dtype=object)

## Find Counties and Zip Codes for Stations
Reference: https://towardsdatascience.com/reverse-geocoding-in-python-a915acf29eb6

In [15]:
# Pull out dataframe of stations, latitude and longitude
stations = df_clean_2[['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'STATE']].drop_duplicates()
stations

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,STATE
0,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,WV
366,USC00442009,"CORBIN, VA US",38.2022,-77.3747,VA
732,USC00440187,"AMELIA COURTHOUSE 1, VA US",37.3451,-77.9781,VA
1098,USC00442245,"DANVILLE, VA US",36.5869,-79.3886,VA
1464,USC00466212,"MORGANTOWN LOCK AND DAM, WV US",39.6203,-79.9698,WV
...,...,...,...,...,...
340746,USC00014798,"LIVINGSTON, AL US",32.5811,-88.1897,AL
341112,USC00152214,"DIX DAM, KY US",37.7858,-84.7077,KY
341478,USC00150397,"BARDSTOWN 5 E, KY US",37.8194,-85.3847,KY
341844,USC00152575,"ETOILE, KY US",36.8276,-85.8975,KY


In [16]:
# Create column that has coordinates combined in a string
stations['COORD'] = stations['LATITUDE'].map(str) + ', ' + stations['LONGITUDE'].map(str)
stations

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,STATE,COORD
0,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,WV,"39.5064, -78.3158"
366,USC00442009,"CORBIN, VA US",38.2022,-77.3747,VA,"38.2022, -77.3747"
732,USC00440187,"AMELIA COURTHOUSE 1, VA US",37.3451,-77.9781,VA,"37.3451, -77.9781"
1098,USC00442245,"DANVILLE, VA US",36.5869,-79.3886,VA,"36.5869, -79.3886"
1464,USC00466212,"MORGANTOWN LOCK AND DAM, WV US",39.6203,-79.9698,WV,"39.6203, -79.9698"
...,...,...,...,...,...,...
340746,USC00014798,"LIVINGSTON, AL US",32.5811,-88.1897,AL,"32.5811, -88.1897"
341112,USC00152214,"DIX DAM, KY US",37.7858,-84.7077,KY,"37.7858, -84.7077"
341478,USC00150397,"BARDSTOWN 5 E, KY US",37.8194,-85.3847,KY,"37.8194, -85.3847"
341844,USC00152575,"ETOILE, KY US",36.8276,-85.8975,KY,"36.8276, -85.8975"


In [18]:
stations_test = stations.sample(2)

In [27]:
# Set up reverse geocoding function
locator = Nominatim(user_agent='myGeocoder', timeout=10)
rgeocode = RateLimiter(locator.reverse, min_delay_seconds=0.001)

# Create columns for county and zip code
stations['COUNTY'] = ''
stations['ZIP'] = ''
missing_geocoding = []

# Pull county and zip code for each station
for index, row in stations.iterrows():
    try:
        coord = row['COORD']
        address = rgeocode(coord)
        county = address.raw['address']['county']
        zipcode = address.raw['address']['postcode']
        stations.loc[index, 'COUNTY'] = county
        stations.loc[index, 'ZIP'] = zipcode
        missing_geocoding.append(row['NAME'])
        print(f"{row['NAME']} complete.")
    except:
        print(f"Cannot find {row['NAME']}.")
    
print('Reverse geocoding complete.')

Cannot find CACAPON STATE PARK 2, WV US.
CORBIN, VA US complete.
AMELIA COURTHOUSE 1, VA US complete.
Cannot find DANVILLE, VA US.
MORGANTOWN LOCK AND DAM, WV US complete.
ROWLESBURG 1, WV US complete.
HUNTINGTON SWG PLANT, WV US complete.
LONDON LOCKS, WV US complete.
BREAKS INTERSTATE PARK, VA US complete.
BOSTON 4 SE, VA US complete.
AMELIA 8 NE, VA US complete.
CHARLESTON INTL. AIRPORT, SC US complete.
MUSTOE 1 SW, VA US complete.
SANDHILL RESEARCH, SC US complete.
FLAT TOP, WV US complete.
OCEANA NAS, VA US complete.
GREENVILLE DOWNTOWN AIRPORT, SC US complete.
COLUMBIA METROPOLITAN AIRPORT, SC US complete.
FENTRESS NAVAL AUXILIARY FIELD, VA US complete.
BUCKEYE, WV US complete.
CHARLESTON WSFO, WV US complete.
CLINTWOOD 1 W, VA US complete.
SOUTH BOSTON, VA US complete.
BIG STONE GAP, VA US complete.
GREENBAY 3 NE, VA US complete.
PULASKI 2 E, VA US complete.
STONEWALL JACKSON DAM, WV US complete.
FREDERICKSBURG SEWAGE, VA US complete.
RIPLEY, WV US complete.
WILLIAMSON, WV US co

BLACKVILLE 3 W, SC US complete.
RADFORD 3 N, VA US complete.
LOST RIVER, WV US complete.
MARTINSVILLE FILTER PLANT, VA US complete.
ANSTEAD HAWKS NEST STATE PARK, WV US complete.
Cannot find RICHWOOD 1 SSE, WV US.
Cannot find JOCASSEE 8 WNW, SC US.
RICHLANDS, VA US complete.
STAFFORDSVILLE 3 ENE, VA US complete.
MORGANTOWN HART FIELD, WV US complete.
MARTINSBURG EASTERN WEST VIRGINIA REGIONAL AIRPORT, WV US complete.
LYNCHBURG INTERNATIONAL AIRPORT, VA US complete.
ORANGEBURG 2, SC US complete.
CHATHAM, VA US complete.
BLACKSBURG NATIONAL WEATHER SERVICE OFFICE, VA US complete.
ROCK CAVE 2 NE, WV US complete.
HAMLIN, WV US complete.
PENNINGTON GAP, VA US complete.
FARMVILLE 2 N, VA US complete.
CROZIER, VA US complete.
WAKEFIELD 1 NW, VA US complete.
STONY CREEK 2 N, VA US complete.
LEWISBURG 3 N, WV US complete.
SUMMERSVILLE LAKE, WV US complete.
WINTHROP UNIVERSITY, SC US complete.
WISE 1 SE, VA US complete.
RIDGEVILLE, SC US complete.
MCCORMICK, SC US complete.
YEMASSEE 1 N, SC US c

MOUNTAIN CITY 2, TN US complete.
Cannot find HUNTSVILLE INTERNATIONAL AIRPORT JONES FIELD, AL US.
MONTEREY, TN US complete.
QUITMAN 2 NW, GA US complete.
CLARKESVILLE, GA US complete.
LAWRENCEBURG FILTER PLANT, TN US complete.
COLLIERVILLE, TN US complete.
TROY, AL US complete.
SAVANNAH 6 SW, TN US complete.
THORSBY EXPERIMENTAL STATION, AL US complete.
ELIZABETHTON, TN US complete.
LONDON CORBIN AIRPORT, KY US complete.
COLUMBIA 3 WNW, TN US complete.
STANTON 2 W, KY US complete.
MANCHESTER 4 W, KY US complete.
WAYCROSS WARE CO AIRPORT, GA US complete.
OAK RIDGE ATDD, TN US complete.
CODEN, AL US complete.
MONTEAGLE, TN US complete.
PRINCETON 1 SE, KY US complete.
CROSSVILLE MEMORIAL AIRPORT, TN US complete.
ALBANY SW GEORGIA REGIONAL AIRPORT, GA US complete.
Cannot find ASHBURN 3 ENE, GA US.
PARSONS WATER PLANT, TN US complete.
WARTRACE 6 E, TN US complete.
WARNER PARK, TN US complete.
CEDARTOWN, GA US complete.
ANNISTON METROPOLITAN AIRPORT, AL US complete.
Cannot find ALMA BACON CO

CORNELIA, GA US complete.
BEAVER DAM, KY US complete.
HENDERSON 8 SSW, KY US complete.
FARMERS 2 S, KY US complete.
MC MINNVILLE, TN US complete.
GLADEVILLE, TN US complete.
HAZLEHURST, GA US complete.
DICKSON, TN US complete.
LEITCHFIELD 2 N, KY US complete.
Cannot find FOLKSTON 9 SW, GA US.
Cannot find COOKEVILLE, TN US.
BRENTWOOD, TN US complete.
DAYTON 2 SE, TN US complete.
LYONS, GA US complete.
Cannot find JESUP 8 S, GA US.
HELENA, AL US complete.
MABLETON 1 N, GA US complete.
Cannot find CENTRE, AL US.
HUNTINGDON WATER PLA, TN US complete.
ALEXANDER CITY, AL US complete.
BROWNSVILLE, TN US complete.
CORDELE, GA US complete.
SAUTEE 3 W, GA US complete.
LOUISVILLE WEATHER FORECAST OFFICE, KY US complete.
GERMANTOWN 4 SE, TN US complete.
HEIDELBERG 2 N, KY US complete.
ROCK ISLAND STATE PARK, TN US complete.
Cannot find TOWNSEND 5 S, TN US.
DECATUR 5 SE, AL US complete.
TOCCOA, GA US complete.
HANCEVILLE, AL US complete.
LAFAYETTE 2 W, AL US complete.
HEFLIN, AL US complete.
TROY 2

In [28]:
stations

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,STATE,COORD,COUNTY,ZIP
0,USC00461324,"CACAPON STATE PARK 2, WV US",39.5064,-78.3158,WV,"39.5064, -78.3158",,
366,USC00442009,"CORBIN, VA US",38.2022,-77.3747,VA,"38.2022, -77.3747",Caroline County,22580
732,USC00440187,"AMELIA COURTHOUSE 1, VA US",37.3451,-77.9781,VA,"37.3451, -77.9781",Amelia County,23002
1098,USC00442245,"DANVILLE, VA US",36.5869,-79.3886,VA,"36.5869, -79.3886",,
1464,USC00466212,"MORGANTOWN LOCK AND DAM, WV US",39.6203,-79.9698,WV,"39.6203, -79.9698",Monongalia County,26501
...,...,...,...,...,...,...,...,...
340746,USC00014798,"LIVINGSTON, AL US",32.5811,-88.1897,AL,"32.5811, -88.1897",Sumter County,35470
341112,USC00152214,"DIX DAM, KY US",37.7858,-84.7077,KY,"37.7858, -84.7077",,
341478,USC00150397,"BARDSTOWN 5 E, KY US",37.8194,-85.3847,KY,"37.8194, -85.3847",Nelson County,40004
341844,USC00152575,"ETOILE, KY US",36.8276,-85.8975,KY,"36.8276, -85.8975",Barren County,42141


In [85]:
missing_geocode = stations.loc[stations['COUNTY'] == '']
missing_geocode

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,STATE,COORD,COUNTY,ZIP
0,USC00461324,"CACAPON STATE PARK 2, WV US",39.50640,-78.31580,WV,"39.5064, -78.3158",,
1098,USC00442245,"DANVILLE, VA US",36.58690,-79.38860,VA,"36.5869, -79.3886",,
17202,USC00440720,"BIG MEADOWS, VA US",38.52160,-78.43550,VA,"38.5216, -78.4355",,
19032,USC00461900,"COOPERS ROCK STATE FOREST, WV US",39.67722,-79.77194,WV,"39.67722, -79.77194",,
20130,USC00464971,"KUMBRABOW, WV US",38.63110,-80.08560,WV,"38.6311, -80.0856",,
...,...,...,...,...,...,...,...,...
335256,USW00003811,"JACKSON MCKELLAR SIPES REGIONAL AIRPORT, TN US",35.59300,-88.91670,TN,"35.593, -88.9167",,
337086,USW00093808,"BOWLING GREEN WARREN CO AIRPORT, KY US",36.96470,-86.42380,KY,"36.9647, -86.4238",,
338184,USC00016988,"ROBERTSDALE, AL US",30.56540,-87.70180,AL,"30.5654, -87.7018",,
340014,USC00403938,"HARTSVILLE, TN US",36.37556,-86.18083,TN,"36.37556, -86.18083",,


In [16]:
# Store cleaned dataset in csv file
df_clean_2.to_csv('sw-weather-cleaned.csv', index=False)