In [64]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook

In [65]:
#Import CSV file
#cityList=['Bernalillo_County','Clark_County','Cook_County','Dallas_County','Davidson_County','
#Denver_County','Fulton_County','Hennepin_County','Jackson_County','King_County','Los_Angeles_County',
#'Maricopa_County','Marion_County','Mecklenburg_County','Miami_Dade_County','New_York_County','Philadelphia_County',
#'San_Francisco_County','Suffolk_County','Wayne_County']

csvCity="San_Francisco_County.csv"
cityDf=pd.read_csv(csvCity)

In [66]:
#Selected columns we want in our dataframe
newColumns=['DATE', 'HourlyDryBulbTemperature']

#Create new data frame based on given columns list
shortCityDf=cityDf[newColumns]
shortCityDf

Unnamed: 0,DATE,HourlyDryBulbTemperature
0,2014-01-01T23:59:00,54.0
1,2014-01-02T23:59:00,56.0
2,2014-01-03T23:59:00,57.0
3,2014-01-04T23:59:00,56.0
4,2014-01-05T23:59:00,57.0
...,...,...
5037,2020-08-13T23:59:00,69.0
5038,2020-08-14T23:59:00,79.0
5039,2020-08-15T23:59:00,75.0
5040,2020-08-16T11:59:00,


In [67]:
#Drop rows with null values. We do not fill values with zeros here as they might be mistaken for temperatures in further analysis
shortCityDf=shortCityDf.dropna(axis=0)
shortCityDf

Unnamed: 0,DATE,HourlyDryBulbTemperature
0,2014-01-01T23:59:00,54.0
1,2014-01-02T23:59:00,56.0
2,2014-01-03T23:59:00,57.0
3,2014-01-04T23:59:00,56.0
4,2014-01-05T23:59:00,57.0
...,...,...
5036,2020-08-12T23:59:00,60.0
5037,2020-08-13T23:59:00,69.0
5038,2020-08-14T23:59:00,79.0
5039,2020-08-15T23:59:00,75.0


In [68]:
#Get only those rows from the month of January, create new data frame for just January
CityDf2014=shortCityDf.loc[shortCityDf['DATE'].str.contains('2014-01'), newColumns]
CityDf2014

Unnamed: 0,DATE,HourlyDryBulbTemperature
0,2014-01-01T23:59:00,54.0
1,2014-01-02T23:59:00,56.0
2,2014-01-03T23:59:00,57.0
3,2014-01-04T23:59:00,56.0
4,2014-01-05T23:59:00,57.0
5,2014-01-06T23:59:00,54.0
6,2014-01-07T23:59:00,55.0
7,2014-01-08T23:59:00,53.0
10,2014-01-09T23:59:00,53.0
11,2014-01-10T23:59:00,55.0


In [69]:
#Convert temperatures into floats
CityDf2014['HourlyDryBulbTemperature']=pd.to_numeric(CityDf2014['HourlyDryBulbTemperature'], errors='coerce')
CityDf2014=CityDf2014.dropna(axis=0)
CityDf2014

Unnamed: 0,DATE,HourlyDryBulbTemperature
0,2014-01-01T23:59:00,54.0
1,2014-01-02T23:59:00,56.0
2,2014-01-03T23:59:00,57.0
3,2014-01-04T23:59:00,56.0
4,2014-01-05T23:59:00,57.0
5,2014-01-06T23:59:00,54.0
6,2014-01-07T23:59:00,55.0
7,2014-01-08T23:59:00,53.0
10,2014-01-09T23:59:00,53.0
11,2014-01-10T23:59:00,55.0


In [70]:
#Get median temperature of the month
monthlyMedian=CityDf2014['HourlyDryBulbTemperature'].median()
monthlyMedian

57.0

In [71]:
#Calculate the months minimum temperature
monthlyMin=CityDf2014['HourlyDryBulbTemperature'].min()
monthlyMin

51.0

In [72]:
#Calculate the months maximum temperature
monthlyMax=CityDf2014['HourlyDryBulbTemperature'].max()
monthlyMax

62.0

In [73]:
#Get mean temperature and round to one decimal place
monthlyMean=round((CityDf2014['HourlyDryBulbTemperature'].mean()),1)
monthlyMean

56.7

In [74]:
#Get county name from csv title
countyName=csvCity.replace('.csv', '')
countyName

'San_Francisco_County'

In [75]:
#Split date based on '-'. Get year from list of split values. 
year=(CityDf2014['DATE'].str.split(pat='-', n=0))[0][0]
year

'2014'

In [76]:
#Create a data frame by defining column names and passing in variables calculated from original dataframe
CityDf2014Summary=[{'County':countyName,'Year':year, 'JanMinimumTemperature':monthlyMin, 'JanMaximumTemperature':monthlyMax, 'JanMeanTemperature':monthlyMean, 'JanMedianTemperature':monthlyMedian}]
CityDf2014SummaryDf=pd.DataFrame(CityDf2014Summary)
#Set county name as the index
CityDf2014SummaryDf=CityDf2014SummaryDf.set_index('County')
CityDf2014SummaryDf

Unnamed: 0_level_0,Year,JanMinimumTemperature,JanMaximumTemperature,JanMeanTemperature,JanMedianTemperature
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
San_Francisco_County,2014,51.0,62.0,56.7,57.0
