In [1]:
import pandas as pd

import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pd.options.plotting.backend = 'plotly'


## Syn1 Final Project 

### Name: Coco Sun

##### In this project, I am going to visually present the climate change through analysis of datasets. Inspired from articles such as 101: Breaking Down Greenhouse Gases that provide statistics of global warming, I want to explore the dataset myself and also to see if there is any other interesting facts about climate crisis. 

##### I would primarily focus on the exmaination of cliamte change on global surface temperature, and then siwtch to nearer areas in California about the frequency of wildfires throughout the years. 
 

The surface temperature is a measure of temperature from the gound. The rising of surface temperature could cause change in sea level and rainfall patterns. It could also has harmful effects on human acitivities such as the farming system. 

In [2]:
avg_temp = pd.read_csv('data/average-monthly-surface-temperature.csv')
avg_temp = avg_temp.rename(columns={'Entity':'Country', 'year':'Year', 'Average surface temperature':'Daily Avg Surface Temp', 'Average surface temperature.1':'Monthly Avg Surface Temp'}).drop(labels=['Day', 'Daily Avg Surface Temp'], axis=1).drop_duplicates()
avg_temp.head()

# source: https://data.worldbank.org/indicator, https://www.kaggle.com/datasets/samithsachidanandan/average-monthly-surface-temperature-1940-2024

Unnamed: 0,Country,Code,Year,Monthly Avg Surface Temp
0,Afghanistan,AFG,1940,11.327695
12,Afghanistan,AFG,1941,13.324756
24,Afghanistan,AFG,1942,12.885448
36,Afghanistan,AFG,1943,11.524769
48,Afghanistan,AFG,1944,12.143665


I am going to use a choropleth plot to show the change of average surface temperature in each country per year. 

In [3]:
min_temp = avg_temp['Monthly Avg Surface Temp'].min()
max_temp = avg_temp['Monthly Avg Surface Temp'].max()

#### 1. Choropleth_map for surface temp

In [4]:
fig = px.choropleth(avg_temp.dropna(), 
              locations='Country', 
              locationmode='country names',
              color='Monthly Avg Surface Temp', 
              color_continuous_scale='reds',
              animation_frame='Year',
              
              range_color=[min_temp, max_temp],
              title='Global Temperature Trends')
# to show the plot here: 
fig.update_traces(colorscale=[
    [0.0, 'white'],
    [0.25, 'lightpink'],
    [0.5, 'pink'],
    [0.75, 'darkorange'],
    [1.0, 'red']
], 
colorbar=dict(title='Monthly Avg Surface Temp'))

fig.update_layout(height=600, width=800)

fig.show()


In [5]:
# to show the plot in an html file:
plotly.offline.plot(fig, filename= 'C: \fig.html', auto_open=True)

'C: \x0cig.html'

In [6]:
# change the fig to html to be included in the report
fig.write_html('choropleth_avg_temp.html', include_plotlyjs='cdn')

From the choropleth, we can see that there is a gradual increase of surface temperature in the past 40 years. To make it clearer, we can directly plot the change of difference in the plot below.

In [15]:
iso_alpha = avg_temp[['Country', 'Code']].drop_duplicates()
iso_alpha

pivot_temp = avg_temp.pivot_table(index='Country', columns='Year', values='Monthly Avg Surface Temp')

temp_diff = pd.DataFrame(pivot_temp[2024] - pivot_temp[1940]).reset_index()

temp_diff.columns = ['Country', 'temp_diff_1940_2024']
temp_diff = temp_diff.merge(iso_alpha, on='Country')
temp_diff.head()

Unnamed: 0,Country,temp_diff_1940_2024,Code
0,Afghanistan,2.165975,AFG
1,Albania,3.788709,ALB
2,Algeria,4.042503,DZA
3,American Samoa,0.883987,ASM
4,Andorra,3.382685,AND


In [16]:
temp_diff['temp_diff_1940_2024'].min()

np.float64(0.23721699999999757)

In [17]:
temp_diff['temp_diff_1940_2024'].max()

np.float64(6.2212965)

In [20]:
temp_diff.head()

Unnamed: 0,Country,temp_diff_1940_2024,Code
0,Afghanistan,2.165975,AFG
1,Albania,3.788709,ALB
2,Algeria,4.042503,DZA
3,American Samoa,0.883987,ASM
4,Andorra,3.382685,AND


#### 2. 3D Scatter Plot for Surface Temperature Difference

In [21]:
temp_diff_fig = px.scatter_geo(temp_diff, 
               locations='Code',
               projection='orthographic',
               color='temp_diff_1940_2024',
               opacity=0.9,
               hover_name='Country',
               hover_data=['temp_diff_1940_2024'],
               color_continuous_scale='reds',
               title='Surface Temperature Difference 1940-2024')


# to show the plot here: 
temp_diff_fig.update_layout(height=600, width=800)
temp_diff_fig.show()

In [22]:
# to show the plot in html file:
plotly.offline.plot(temp_diff_fig, filename= 'C: \temp_diff_fig.html', auto_open=True)

'C: \temp_diff_fig.html'

In [23]:
# change the fig to html to be included in the report
temp_diff_fig.write_html('scatter3D_temp_diff.html', include_plotlyjs='cdn')

Then I am trying to examine the frequency of wildfires happened in California. The most recent dataset I could find contain information from 2014-2023. 

In [20]:
wildfires_cali = pd.read_csv('data/California Wildfire Damage.csv')
wildfires_cali['Location'] = wildfires_cali['Location'].str.replace('County', "").str.strip()
wildfires_cali.head()

# Source: https://www.kaggle.com/datasets/vivekattri/california-wildfire-damage-2014-feb2025


Unnamed: 0,Incident_ID,Date,Location,Area_Burned (Acres),Homes_Destroyed,Businesses_Destroyed,Vehicles_Damaged,Injuries,Fatalities,Estimated_Financial_Loss (Million $),Cause
0,INC1000,2020-11-22,Sonoma,14048,763,474,235,70,19,2270.57,Lightning
1,INC1001,2021-09-23,Sonoma,33667,1633,4,263,100,2,1381.14,Lightning
2,INC1002,2022-02-10,Shasta,26394,915,291,31,50,6,2421.96,Human Activity
3,INC1003,2021-05-17,Sonoma,20004,1220,128,34,28,0,3964.16,Unknown
4,INC1004,2021-09-22,Sonoma,40320,794,469,147,0,15,1800.09,Unknown


In [13]:
cali_lat_lon = pd.read_csv('data/cal_cities_lat_long.csv')
cali_lat_lon.head()

# source: https://www.kaggle.com/datasets/camnugent/california-housing-feature-engineering

Unnamed: 0,Name,Latitude,Longitude
0,Adelanto,34.582769,-117.409214
1,Agoura Hills,34.153339,-118.761675
2,Alameda,37.765206,-122.241636
3,Albany,37.886869,-122.297747
4,Alhambra,34.095286,-118.127014


In [14]:
wildfires_cali = wildfires_cali.merge(cali_lat_lon, left_on='Location', right_on='Name', how='left').dropna().sort_values(['Date'])
wildfires_cali['Year'] = wildfires_cali['Date'].str.split('-').str[0]
wildfires_cali.head()

Unnamed: 0,Incident_ID,Date,Location,Area_Burned (Acres),Homes_Destroyed,Businesses_Destroyed,Vehicles_Damaged,Injuries,Fatalities,Estimated_Financial_Loss (Million $),Cause,Name,Latitude,Longitude,Year
58,INC1058,2014-02-03,Santa Barbara,47674,1186,438,71,54,11,2580.87,Unknown,Santa Barbara,34.420831,-119.698189,2014
61,INC1061,2014-03-28,Los Angeles,11162,544,223,196,100,8,85.57,Unknown,Los Angeles,34.052233,-118.243686,2014
17,INC1017,2014-04-16,Sonoma,25094,1014,288,269,30,12,268.91,Human Activity,Sonoma,38.291858,-122.458036,2014
45,INC1045,2014-05-13,Sonoma,21703,735,279,294,3,1,1305.34,Human Activity,Sonoma,38.291858,-122.458036,2014
47,INC1047,2014-05-16,San Diego,28661,276,313,89,7,10,3485.12,Unknown,San Diego,32.715328,-117.157256,2014


#### 3. Scatter Map for California Wildfires

In [15]:
fig_cali = px.scatter_mapbox(wildfires_cali, lat='Latitude', lon='Longitude', hover_name='Cause',animation_frame='Year', mapbox_style='open-street-map', size = 'Area_Burned (Acres)', zoom=4, title='Wildfires in California 2014-2023')


# to show the plot here: 
fig_cali.update_layout(height=600, width=800)
fig_cali.show()


*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [16]:
plotly.offline.plot(fig_cali, filename= 'C: \fig_cali.html', auto_open=True)

'C: \x0cig_cali.html'

In [96]:
fig_cali.write_html('wildfires_cali.html', include_plotlyjs='cdn')

The various plots that I show above is also an indication that the advancing technology do play a role in climate crisis. In addition to the function of presenting the progress of global warming, generative AI could also build up models to predict the temperature changing or next wildfire in the area. But predictions does not guarantee certainty, and individual efforts is necessary to prevent the proceeding of global warming. 

### Part 3: Progress

In [17]:
headlines = pd.read_csv('data/climate_headlines_sentiment.csv')
headlines.head()

FileNotFoundError: [Errno 2] No such file or directory: 'data/climate_headlines_sentiment.csv'

In [15]:
headlines['Sentiment'].value_counts().sort_index() / len(headlines)

Sentiment
-1.0    0.268555
-0.5    0.148438
 0.0    0.208008
 0.5    0.225586
 1.0    0.148438
Name: count, dtype: float64

In [16]:
opinions = pd.read_csv('data/reddit_opinion_climate_change.csv')
opinions


Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,comment_id,score,self_text,subreddit,created_time,post_id,author_name,controversiality,ups,downs,...,user_link_karma,user_comment_karma,user_total_karma,post_score,post_self_text,post_title,post_upvote_ratio,post_thumbs_ups,post_total_awards_received,post_created_time
0,mevbc5m,1,Yeah that's possible but that also supports th...,politics,2025-02-26 11:36:39,1iy91zh,jimmydean885,0,1,0,...,494.0,292410.0,293520.0,13,,Ukraine and US agree to terms of rare earth mi...,0.67,13,0,2025-02-25 23:21:50
1,mevb9vd,1,"Hold on, destroying progress???? What progre...",climate,2025-02-26 11:36:06,1iyl91d,yeltneb77,0,1,0,...,1.0,966.0,967.0,8,,The US is destroying climate progress. Here’s ...,1.00,8,0,2025-02-26 11:18:28
2,mevb93a,1,Fossil fuels are the reason energy prices are ...,unitedkingdom,2025-02-26 11:35:55,1iyibzg,playervlife,0,1,0,...,829.0,23933.0,24863.0,5,,UK urged to act now on net zero – and skip two...,0.52,5,0,2025-02-26 07:46:11
3,mevb33e,1,I saw this best described in a YouTube short t...,changemyview,2025-02-26 11:34:31,1iwtph3,KingOfTheJellies,0,1,0,...,683.0,38854.0,39861.0,573,Let’s cut the nostalgia. My dad worked a summe...,"CMV: Boomers had it 10x easier than Gen Z, and...",0.88,573,0,2025-02-24 04:31:09
4,mevb2ku,1,But this whole thing is corporate gaslighting ...,unitedkingdom,2025-02-26 11:34:24,1iyibzg,DigitalRoman486,0,1,0,...,6394.0,141970.0,148805.0,5,,UK urged to act now on net zero – and skip two...,0.52,5,0,2025-02-26 07:46:11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1023233,dzyrmqu,2,"Yes, I remember the hype that ended in nothing...",climate_science,2018-06-01 22:18:21,8ntlug,MissPinga,0,2,0,...,301.0,3516.0,3817.0,19,If algae is responsible for cleaning our air m...,Why dont we farm algae on a large scale to red...,0.96,19,0,2018-06-01 17:09:16
1023234,dzyf2d7,2,There were a [lot of people who lost a lot of ...,climate_science,2018-06-01 18:58:56,8ntlug,silence7,0,2,0,...,1184736.0,259884.0,1469188.0,19,If algae is responsible for cleaning our air m...,Why dont we farm algae on a large scale to red...,0.96,19,0,2018-06-01 17:09:16
1023235,dzyesw9,1,"Well, it can be used as a bio fuel. This is an...",climate_science,2018-06-01 18:55:08,8ntlug,dano1066,0,1,0,...,12834.0,184926.0,198366.0,19,If algae is responsible for cleaning our air m...,Why dont we farm algae on a large scale to red...,0.96,19,0,2018-06-01 17:09:16
1023236,dzybxzq,5,There's no money in algae,climate_science,2018-06-01 18:13:20,8ntlug,ZippymcOswald,0,5,0,...,39918.0,26950.0,67462.0,19,If algae is responsible for cleaning our air m...,Why dont we farm algae on a large scale to red...,0.96,19,0,2018-06-01 17:09:16


#### Citations:


