<a href="https://colab.research.google.com/github/sarwar-khan/California-Wildfire-Damage-/blob/main/Untitled49.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.colors as colors
pio.templates.default = "plotly_white"

In [35]:
url = "https://raw.githubusercontent.com/sarwar-khan/California-Wildfire-Damage-/refs/heads/main/California%20Wildfire%20Damage.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,Incident_ID,Date,Location,Area_Burned (Acres),Homes_Destroyed,Businesses_Destroyed,Vehicles_Damaged,Injuries,Fatalities,Estimated_Financial_Loss (Million $),Cause
0,INC1000,2020-11-22,Sonoma County,14048,763,474,235,70,19,2270.57,Lightning
1,INC1001,2021-09-23,Sonoma County,33667,1633,4,263,100,2,1381.14,Lightning
2,INC1002,2022-02-10,Shasta County,26394,915,291,31,50,6,2421.96,Human Activity
3,INC1003,2021-05-17,Sonoma County,20004,1220,128,34,28,0,3964.16,Unknown
4,INC1004,2021-09-22,Sonoma County,40320,794,469,147,0,15,1800.09,Unknown


In [5]:
df.shape

(100, 11)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Incident_ID                           100 non-null    object 
 1   Date                                  100 non-null    object 
 2   Location                              100 non-null    object 
 3   Area_Burned (Acres)                   100 non-null    int64  
 4   Homes_Destroyed                       100 non-null    int64  
 5   Businesses_Destroyed                  100 non-null    int64  
 6   Vehicles_Damaged                      100 non-null    int64  
 7   Injuries                              100 non-null    int64  
 8   Fatalities                            100 non-null    int64  
 9   Estimated_Financial_Loss (Million $)  100 non-null    float64
 10  Cause                                 100 non-null    object 
dtypes: float64(1), int64

In [10]:
df.describe()

Unnamed: 0,Area_Burned (Acres),Homes_Destroyed,Businesses_Destroyed,Vehicles_Damaged,Injuries,Fatalities,Estimated_Financial_Loss (Million $)
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,26531.46,941.89,251.57,150.33,42.04,9.93,2396.0993
std,14282.123824,543.019967,138.317761,88.471197,29.201467,5.682251,1430.439422
min,357.0,18.0,4.0,5.0,0.0,0.0,52.59
25%,15916.25,501.0,134.75,70.75,16.0,5.0,1175.195
50%,25618.0,908.5,256.5,150.5,37.0,10.0,2408.53
75%,39775.0,1401.75,371.0,229.75,60.0,14.25,3662.11
max,49653.0,1968.0,493.0,300.0,100.0,20.0,4866.99


In [7]:
df.isnull().sum()

Unnamed: 0,0
Incident_ID,0
Date,0
Location,0
Area_Burned (Acres),0
Homes_Destroyed,0
Businesses_Destroyed,0
Vehicles_Damaged,0
Injuries,0
Fatalities,0
Estimated_Financial_Loss (Million $),0


In [37]:
df['Date']= pd.to_datetime(df['Date'],errors = 'coerce')
df['Year']= df['Date'].dt.year
df['Month']=df['Date'].dt.month
df.head(1)

Unnamed: 0,Incident_ID,Date,Location,Area_Burned (Acres),Homes_Destroyed,Businesses_Destroyed,Vehicles_Damaged,Injuries,Fatalities,Estimated_Financial_Loss (Million $),Cause,Year,Month
0,INC1000,2020-11-22,Sonoma County,14048,763,474,235,70,19,2270.57,Lightning,2020,11


# **Trends in wildfires over the years**

In [28]:
wildfire_trend = df.groupby('Year').size().reset_index(name='Wildfire_Count')

fig = px.bar(
    wildfire_trend,
    x='Year',
    y='Wildfire_Count',
    title="California Wildfires Over the Years",
    labels={'Wildfire_Count': 'Number of Wildfires'},
    text_auto=True
)
fig

In [25]:
df['Year'].max()

2023

# **Area burned over time**

In [30]:
area_burned_trend =df.groupby('Year')['Area_Burned (Acres)'].sum().reset_index()

fig = px.area(
    area_burned_trend,
    x='Year',
    y='Area_Burned (Acres)',
    title="Total Area Burned by Wildfires Each Year",
    labels={'Area_Burned (Acres)': 'Acres Burned'},
    line_shape='linear'
)

fig


# **Analyze causes of wildfires**

In [34]:
cause_count = df['Cause'].value_counts().reset_index()
cause_count.columns = ['Cause', 'Count']
fig = px.pie(cause_count, names='Cause', values='Count', title="Common Causes of Wildfires")
fig.show()

# **Financial impact of wildfires**

In [43]:
financial_loss_trend = df.groupby('Year')['Estimated_Financial_Loss (Million $)'].sum().reset_index()
fig = px.line(financial_loss_trend,
              x='Year',
              y='Estimated_Financial_Loss (Million $)',
              title="Financial Loss Due to Wildfires Over the Years",
              markers=True)
fig.show()


# **Human impact analysis (Injuries & Fatalities)**

In [50]:
human_impact = df.groupby('Year')[['Injuries', 'Fatalities']].sum().reset_index()

In [61]:
fig = px.bar(human_impact, x='Year', y=['Injuries', 'Fatalities'],
             title="Human Impact of Wildfires (Injuries & Fatalities)",
             barmode='group',
             labels={'value': 'Count', 'variable': 'Impact Type'},
             color_discrete_map={'Injuries': 'light blue', 'Fatalities': 'orange'})

fig.update_layout(xaxis_title="Year", yaxis_title="Count", template="plotly_white")
fig.show()


# **Most affected locations**

In [56]:
location_damage = df.groupby('Location')['Area_Burned (Acres)'].sum().reset_index()
top_locations = location_damage.sort_values(by='Area_Burned (Acres)', ascending=False).reset_index().head(10)


fig = px.bar(top_locations,
             x='Location',
             y='Area_Burned (Acres)',
             title="Top 10 Locations with Most Area Burned",
             labels={'Area_Burned (Acres)': 'Acres Burned'})
fig.show()