In [1]:
import pandas as pd
import json
import requests
import os
import time
import calendar
import datetime
from datetime import date

In [2]:
# Make sure to re-name BOTH of these so the correct file is read in, 
# and so the cleaned file doesn't over-write previous work
start_year = 2021
end_year = 2022

file_name = f"{start_year}-{end_year}.csv"
new_file_name = f"{start_year}-{end_year}.csv"

csv_df = pd.read_csv(f"Resources/raw_csv/{file_name}")


In [3]:
timestamp = None
csv_df.insert((len(csv_df.columns)-1), 'Timestamp', timestamp)
csv_df.head()

Unnamed: 0,Acres Burned,Latitude,Longitude,Timestamp,Incident Start
0,250.0,34.20955,-118.83967,,2021-01-14T16:00:00Z
1,23.0,37.25086,-119.51982,,2021-01-19T12:08:00Z
2,20.0,37.08931,-122.10509,,2021-01-19T06:48:00Z
3,15.0,37.18259,-122.28823,,2021-01-18T21:00:00Z
4,22.0,37.17749,-122.1756,,2021-01-18T21:00:00Z


In [4]:
for index, row in csv_df.iterrows():
    iso_timestamp = csv_df.loc[index, 'Incident Start']
    time_truple = time.strptime(iso_timestamp, '%Y-%m-%dT%H:%M:%SZ')
    unix_timestamp = calendar.timegm(time_truple)
    csv_df.loc[index, 'Timestamp'] = int(unix_timestamp)
csv_df.head()

Unnamed: 0,Acres Burned,Latitude,Longitude,Timestamp,Incident Start
0,250.0,34.20955,-118.83967,1610640000,2021-01-14T16:00:00Z
1,23.0,37.25086,-119.51982,1611058080,2021-01-19T12:08:00Z
2,20.0,37.08931,-122.10509,1611038880,2021-01-19T06:48:00Z
3,15.0,37.18259,-122.28823,1611003600,2021-01-18T21:00:00Z
4,22.0,37.17749,-122.1756,1611003600,2021-01-18T21:00:00Z


In [5]:
bins = [1609488000, 1612166400, 1614585600, 1617260400, 1619852400, 1622530800,
       1625122800, 1627801200, 1630479600, 1633071600, 1635750000, 1638345600,
       1641024000, 1643702400, 1646121600, 1648796400, 1651388400, 1654066800,
       1656658800, 1659337200, 1662015600, 1664607600, 1667286000, 1669881600, 1672560000]

names = ['Jan-2021', 'Feb-2021', 'Mar-2021', 'Apr-2021', 'May-2021', 'Jun-2021',
              'Jul-2021', 'Aug-2021', 'Sep-2021', 'Oct-2021', 'Nov-2021', 'Dec-2021',
              'Jan-2022', 'Feb-2022', 'Mar-2022', 'Apr-2022', 'May-2022', 'Jun-2022',
              'Jul-2022', 'Aug-2022', 'Sep-2022', 'Oct-2022', 'Nov-2022', 'Dec-2022']

In [6]:
csv_df['Month'] = pd.cut(csv_df['Timestamp'], bins, labels=names, include_lowest=True)
csv_df.head()

Unnamed: 0,Acres Burned,Latitude,Longitude,Timestamp,Incident Start,Month
0,250.0,34.20955,-118.83967,1610640000,2021-01-14T16:00:00Z,Jan-2021
1,23.0,37.25086,-119.51982,1611058080,2021-01-19T12:08:00Z,Jan-2021
2,20.0,37.08931,-122.10509,1611038880,2021-01-19T06:48:00Z,Jan-2021
3,15.0,37.18259,-122.28823,1611003600,2021-01-18T21:00:00Z,Jan-2021
4,22.0,37.17749,-122.1756,1611003600,2021-01-18T21:00:00Z,Jan-2021


In [14]:
d = {'Month': csv_df['Month'],
     'Acres Burned': csv_df['Acres Burned'],
     'Latitude': csv_df['Latitude'],
     'Longitude': csv_df['Longitude'],
     'Timestamp': csv_df['Timestamp'],
     'Incident Start': csv_df['Incident Start']
     }
cleaned_df = pd.DataFrame(data = d, index = None)
cleaned_df

Unnamed: 0,Month,Acres Burned,Latitude,Longitude,Timestamp,Incident Start
0,Jan-2021,250.0,34.20955,-118.83967,1610640000,2021-01-14T16:00:00Z
1,Jan-2021,23.0,37.25086,-119.51982,1611058080,2021-01-19T12:08:00Z
2,Jan-2021,20.0,37.08931,-122.10509,1611038880,2021-01-19T06:48:00Z
3,Jan-2021,15.0,37.18259,-122.28823,1611003600,2021-01-18T21:00:00Z
4,Jan-2021,22.0,37.17749,-122.17560,1611003600,2021-01-18T21:00:00Z
...,...,...,...,...,...,...
305,Sep-2022,160.0,40.44109,-121.81822,1662733860,2022-09-09T14:31:00Z
306,Sep-2022,48.0,39.19020,-120.82660,1663073460,2022-09-13T12:51:00Z
307,Sep-2022,29.0,39.99290,-121.21220,1663426712,2022-09-17T14:58:32Z
308,Sep-2022,30.0,41.49870,-122.34140,1664268890,2022-09-27T08:54:50Z


In [8]:
cleaned_df.to_csv(f"Resources/cleaned_csv/{start_year}-{end_year}.csv", index=False, header=True)