## SETUP and Index Variables

In [None]:
from pathlib import Path
import csv
from datetime import datetime
import matplotlib.pyplot as plt

file_name='san_francisco_intl_2021_full.csv'
location ='san_francisco_intl'
path = Path(f'../data/weather_data/{file_name}')

lines = path.read_text().splitlines()
reader = csv.reader(lines)


#Reading header_row and putting the reader on second line
header_row = next(reader)
print(f'header row: {header_row}')
#Setting the indexes automatically
tmax_index = header_row.index('TMAX')
tmin_index = header_row.index('TMIN')
prcp_index = header_row.index('PRCP')
date_index = header_row.index('DATE')

        
# See all column indexes using enumerate, to make it easier to know whats in the file
for index, column_header in enumerate(header_row):
    print(index, column_header)


## Extracting and reading data

Date - using the module datetime, we extract the dates using strptime(timestring, format). We will use strptime(datestring, '%Y-%m-%d')
as that is how date is formated in this csv file. 

Error handling - we need to appropriately handle missing or erroneous data in csv files, because usually real world data comes with
some noise and incorrect values, like missing data and erroneous data. 
In this case, a common occurrence would be for example missing data in the high and low temps, like for example '', which would return a 
ValueError when read with int(). There are many ways to handle errors like that. One option is using try-except-else loop. 

In [None]:

#Extraction all Tmax (Highs) - it starts from second line, as the first was already read on header_row
dates, highs, lows, prcp = [], [], [], []
for row in reader:
    current_date = datetime.strptime(row[2], '%Y-%m-%d')
    #Error handling 
    try:
        high = int(row[tmax_index])
        low = int(row[tmin_index])
        precipitation = float(row[prcp_index])
    except ValueError:
        print(f'missing data for {current_date}')
    else:
        dates.append(current_date)
        highs.append(high)
        lows.append(low)
        prcp.append(precipitation)



## Ploting the data using matplotlib 

For high and low temperatures, with an area filled between them.

In [None]:
plt.style.use('ggplot')
fig, ax = plt.subplots()

#Ploting both highs and lows
ax.plot(dates, highs, color='red', alpha=0.5)
ax.plot(dates, lows, color='blue', alpha=0.5)
ax.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)

#Formatting plot
ax.set_title(f'Daily high and low temperatures,{location}, 2021', fontsize=24)
ax.set_xlabel('', fontsize=16)
ax.set_ylabel("Temperature (F)", fontsize=16)
ax.tick_params(labelsize=16)
fig.autofmt_xdate()

#Set yaxis range so you can compare both places
ax.set_ylim(0,140)

plt.savefig(f'../figures/{location}_temps.png', bbox_inches='tight')
plt.show()

In [None]:
## Creating a plot for precipitation
fig, ax = plt.subplots()

#Plotting precipitation values
ax.plot(dates,prcp,c='blue')

ax.set_title(f'Precipitation data in {location}, 2021')
ax.set_ylabel('Precipitation')
ax.set_ylim(0,5)

plt.savefig(f'../figures/{location}_prcp.png')
plt.show()
