In [30]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pickle
import requests

In [23]:
# Load data
data = pd.read_csv('cleaned_wildfire_data.csv')

In [5]:
data

Unnamed: 0.1,Unnamed: 0,X,Y,FireCause,FireDiscoveryDateTime,POOCounty,POOFips,POOState,weather_data,temp,humidity,precip,windspeed,winddir,solarradiation
0,0,-118.180712,33.808985,Unknown,2020/02/28 20:45:40+00,Los Angeles,06037,US-CA,"{""queryCost"":1,""latitude"":33.8089848330001,""lo...",68.5,32.7,0.0,9.1,212.1,335.8
1,3,-117.228592,33.782442,Undetermined,2021/11/25 15:17:32+00,Riverside,06065,US-CA,"{""queryCost"":1,""latitude"":33.7824418820001,""lo...",59.7,9.7,0.0,21.8,29.6,304.2
2,4,-118.309032,33.941815,Undetermined,2022/11/21 11:25:33+00,Los Angeles,06037,US-CA,"{""queryCost"":1,""latitude"":33.9418148400001,""lo...",63.7,25.9,0.0,8.9,314.9,65.3
3,8,-108.270562,47.003940,Human,2019/05/02 21:32:49+00,Petroleum,30069,US-MT,"{""queryCost"":1,""latitude"":47.0039400110001,""lo...",41.9,51.6,0.0,22.0,280.6,275.6
4,10,-115.946014,48.779806,Unknown,2018/05/22 14:21:50+00,Lincoln,30053,US-MT,"{""queryCost"":1,""latitude"":48.779806384,""longit...",65.5,51.6,0.0,7.0,339.9,324.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179431,250337,-95.081657,35.411146,Human,2023/02/28 23:35:00+00,Haskell,40061,US-OK,"{""queryCost"":1,""latitude"":35.411146306,""longit...",60.1,46.3,0.0,16.8,216.6,229.3
179432,250338,-95.288896,33.190561,Undetermined,2023/02/28 22:41:00+00,Franklin,48159,US-TX,"{""queryCost"":1,""latitude"":33.1905614400001,""lo...",64.1,43.8,0.0,10.5,212.8,171.9
179433,250339,-95.278237,31.444536,Undetermined,2023/02/28 22:47:00+00,Houston,48225,US-TX,"{""queryCost"":1,""latitude"":31.4445355400001,""lo...",63.2,70.2,0.0,10.3,188.9,174.7
179434,250340,-89.436166,37.657667,Human,2023/02/28 22:33:12+00,Jackson,17077,US-IL,"{""queryCost"":1,""latitude"":37.657667002,""longit...",54.0,66.9,0.0,12.7,68.1,198.6


In [8]:
df = data['FireCause']!='Human'
len(df)

108724

In [18]:
data['Date'] = pd.to_datetime(data['FireDiscoveryDateTime']).astype(int) / 10**12
# data['FireDiscoveryDateTime']

In [20]:
# Define the features and target
# ['latitude', 'longitude', 'temp', 'FireDiscoveryDateTime', 'precipitation', 'humidity', 'solar_radiation', 'wind_speed']
X = data[['Y', 'X', 'temp', 'Date', 'precip', 'humidity', 'solarradiation', 'windspeed']]
y = data['FireCause']!='Human'

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [21]:
# Define and train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [22]:
# Evaluate the model
y_pred = rf_model.predict(X_test)
acc_score = accuracy_score(y_test, y_pred)
print("Accuracy Score:", acc_score)

Accuracy Score: 0.8223328565324813


In [24]:
# Save the model
with open('wildfire_model.pkl', 'wb') as pickle_file:
    pickle.dump(rf_model, pickle_file)

In [None]:
# Load the model and make predictions on new data inputs

new_data = pd.DataFrame([[34.06, -118.23, 75, '2023-03-03', 0.01, 65, 100, 5]], columns=X.columns)
loaded_model = joblib.load('wildfire_model.pkl')
prediction = loaded_model.predict(new_data)
print("New data prediction:", prediction)

In [34]:
raws_data = pd.read_csv('recent_wildfires.csv')

In [35]:
raws_data.columns

Index(['X', 'Y', 'OBJECTID', 'SourceOID', 'ABCDMisc', 'ADSPermissionState',
       'ContainmentDateTime', 'ControlDateTime', 'CreatedBySystem',
       'IncidentSize', 'DiscoveryAcres', 'DispatchCenterID',
       'EstimatedCostToDate', 'FinalAcres', 'FinalFireReportApprovedByTitle',
       'FinalFireReportApprovedByUnit', 'FinalFireReportApprovedDate',
       'FireBehaviorGeneral', 'FireBehaviorGeneral1', 'FireBehaviorGeneral2',
       'FireBehaviorGeneral3', 'FireCause', 'FireCauseGeneral',
       'FireCauseSpecific', 'FireCode', 'FireDepartmentID',
       'FireDiscoveryDateTime', 'FireMgmtComplexity', 'FireOutDateTime',
       'FireStrategyConfinePercent', 'FireStrategyFullSuppPercent',
       'FireStrategyMonitorPercent', 'FireStrategyPointZonePercent',
       'FSJobCode', 'FSOverrideCode', 'GACC', 'ICS209ReportDateTime',
       'ICS209ReportForTimePeriodFrom', 'ICS209ReportForTimePeriodTo',
       'ICS209ReportStatus', 'IncidentManagementOrganization', 'IncidentName',
       'Incide

In [131]:
df = raws_data.filter(['X', 'Y', 'FireCause', 'FireDiscoveryDateTime'])

In [132]:
df

Unnamed: 0,X,Y,FireCause,FireDiscoveryDateTime
0,-118.170692,33.949725,Undetermined,2023/03/02 07:32:34+00
1,-117.251002,33.938963,Undetermined,2023/03/02 08:23:41+00
2,-79.870403,33.201946,Undetermined,2023/03/02 12:47:17+00
3,-82.398564,33.935007,Undetermined,2023/03/02 12:52:02+00
4,-88.964275,31.132366,Human,2023/03/02 12:43:59+00
5,-80.888143,26.306605,Undetermined,2023/03/02 13:39:11+00
6,-88.758966,37.588067,Human,2023/03/02 14:30:25+00
7,-96.479098,36.738286,Human,2023/03/02 15:30:00+00
8,-89.065066,37.726447,Human,2023/03/02 15:37:47+00
9,-89.03128,37.717594,Human,2023/03/02 15:37:47+00


In [151]:
non_human_fires = df[df['FireCause']!='Human']
non_human_fires

Unnamed: 0,X,Y,FireCause,FireDiscoveryDateTime
0,-118.170692,33.949725,Undetermined,2023/03/02 07:32:34+00
1,-117.251002,33.938963,Undetermined,2023/03/02 08:23:41+00
2,-79.870403,33.201946,Undetermined,2023/03/02 12:47:17+00
3,-82.398564,33.935007,Undetermined,2023/03/02 12:52:02+00
5,-80.888143,26.306605,Undetermined,2023/03/02 13:39:11+00
10,-118.209772,33.923195,Undetermined,2023/03/02 16:26:45+00
11,-85.846925,33.516726,Undetermined,2023/03/02 16:21:25+00
12,-85.879915,33.521136,Undetermined,2023/03/02 16:50:03+00
13,-89.411117,31.053617,Undetermined,2023/03/02 06:45:00+00
14,-92.868107,36.649937,Undetermined,2023/03/02 15:35:00+00


In [152]:
non_human_fires = pd.DataFrame(non_human_fires)

In [153]:
non_human_fires[['temp', 'humidity', 'precip', 'windspeed', 'solarradiation']] = ''

In [154]:
for index, row in non_human_fires.iterrows():
    fire_date = row["FireDiscoveryDateTime"].split(" ")[0].replace("/","-")
    lng, lat = str(row["Y"]), str(row["X"])
    
    url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/" \
        + lng + "," + lat + "/" + fire_date + "/" + fire_date \
        + "?unitGroup=us&include=days&key=N34H8P28WBZH5ADUJAVME467A&contentType=json"
    response = requests.request("GET", url)
    if response.status_code!=200:
        print(index, 'Unexpected Status code: ', response.status_code, response.text)
        break
    else:
        # print(response.text)
        wd = response.json()
        wd = wd['days'][0]
        non_human_fires.at[index, 'temp'] = wd['temp'] if 'temp' in wd else np.nan
        non_human_fires.at[index, 'humidity'] = wd['humidity'] if 'humidity' in wd else np.nan
        non_human_fires.at[index, 'precip'] = wd['precip'] if 'precip' in wd else np.nan
        non_human_fires.at[index, 'windspeed'] = wd['windspeed'] if 'windspeed' in wd else np.nan
        non_human_fires.at[index, 'solarradiation'] = wd['solarradiation'] if 'solarradiation' in wd else np.nan
        

In [155]:
non_human_fires = non_human_fires.drop('FireCause', axis=1)

In [156]:
non_human_fires['Date'] = pd.to_datetime(non_human_fires['FireDiscoveryDateTime']).astype(int) / 10**12
non_human_fires = non_human_fires.drop('FireDiscoveryDateTime', axis=1)

In [161]:
non_human_fires = non_human_fires[['Y', 'X', 'temp', 'Date', 'precip', 'humidity', 'solarradiation', 'windspeed']]
loaded_model = None
with open('wildfire_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

In [162]:
non_human_fires

Unnamed: 0,Y,X,temp,Date,precip,humidity,solarradiation,windspeed
0,33.949725,-118.170692,52.7,1677742.354,0.0,50.7,229.9,9.5
1,33.938963,-117.251002,44.9,1677745.421,0.0,64.9,224.5,8.2
2,33.201946,-79.870403,68.8,1677761.237,0.019,82.0,74.3,11.7
3,33.935007,-82.398564,66.4,1677761.522,0.044,83.8,25.4,10.8
5,26.306605,-80.888143,76.1,1677764.351,0.0,68.9,269.3,14.3
10,33.923195,-118.209772,53.2,1677774.405,0.0,49.2,231.3,9.9
11,33.516726,-85.846925,62.7,1677774.085,0.786,86.1,15.1,13.7
12,33.521136,-85.879915,62.6,1677775.803,0.807,86.4,16.9,13.7
13,31.053617,-89.411117,76.5,1677739.5,0.009,77.0,158.8,16.1
14,36.649937,-92.868107,48.8,1677771.3,1.368,68.7,77.8,12.7


In [183]:
import csv

with open('/home/revanth/Yolo/output.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Y', 'X', 'prediction'])
    for index, row in non_human_fires.iterrows():
        incident = row.to_frame().T
        prediction = loaded_model.predict(incident)
        prediction_prob = loaded_model.predict_proba(incident)
        print("data prediction of index:", index, "is: ", prediction, prediction_prob)
        # print(row['Y'],row['X'])
        writer.writerow([row['Y'],row['X'], int(prediction_prob[0][1] * 10)])
        # print()

data prediction of index: 0 is:  [ True] [[0.01 0.99]]
data prediction of index: 1 is:  [ True] [[0.01 0.99]]
data prediction of index: 2 is:  [ True] [[0.17 0.83]]
data prediction of index: 3 is:  [ True] [[0.4 0.6]]
data prediction of index: 5 is:  [ True] [[0.14 0.86]]
data prediction of index: 10 is:  [ True] [[0. 1.]]
data prediction of index: 11 is:  [False] [[0.55 0.45]]
data prediction of index: 12 is:  [False] [[0.55 0.45]]
data prediction of index: 13 is:  [ True] [[0.33 0.67]]
data prediction of index: 14 is:  [False] [[0.52 0.48]]
data prediction of index: 16 is:  [False] [[0.56 0.44]]
data prediction of index: 17 is:  [ True] [[0. 1.]]
data prediction of index: 18 is:  [ True] [[0.4 0.6]]
data prediction of index: 19 is:  [ True] [[0.46 0.54]]
data prediction of index: 20 is:  [ True] [[0. 1.]]
data prediction of index: 21 is:  [False] [[0.75 0.25]]
data prediction of index: 22 is:  [ True] [[0.02 0.98]]
data prediction of index: 23 is:  [False] [[0.66 0.34]]
data predicti

In [149]:
# Load the model and make predictions on new data inputs

# new_data = pd.DataFrame([[34.06, -118.23, 75, '2023-03-03', 0.01, 65, 100, 5]], columns=X.columns)



New data prediction: [False]


Feature names must be in the same order as they were in fit.



In [27]:
data.at[0,'POOState']

'US-CA'