In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go

import plotly
plotly.offline.init_notebook_mode(connected=True)

#Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<h1 style="font-size:180%; color:DeepSkyBlue;"><i><b>Climate Change AI (CCAI)</b></i></h1>

"Climate Change AI is a global initiative to catalyze impactful work at the intersection of climate change and machine learning."

https://www.climatechange.ai/

![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQMrLAFpfIBOGSb0V8vpbgu87RJp3jAbLLNMg&usqp=CAU)aionics.io

In [None]:
df = pd.read_csv("/kaggle/input/phase-ii-widsdatathon2022/ccai/ccai/data.csv", delimiter=',', encoding='utf8')
df.tail(2)

In [None]:
df.isnull().sum()

In [None]:
from shapely.geometry import Point
import geopandas as gpd
import seaborn as sns
import contextily
import matplotlib.pyplot as plt
import warnings
from scipy import stats 
import math


from shapely.geometry import Point, LineString

import folium
from folium import Marker, GeoJson
from folium.plugins import MarkerCluster, HeatMap

![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRkN2B8QEfVWJ87knJHcEhCp8qLka9A8qNOnw&usqp=CAU)youtube.com

In [None]:
import fiona
import geopandas
from fiona.transform import transform_geom

#Spoiler alert, we don't need Fiona here:) 

![](https://images-na.ssl-images-amazon.com/images/I/51E6098ZR4L.jpg)amazon.com.br

In [None]:
# Lets first handle numerical features with nan value
numerical_nan = [feature for feature in df.columns if df[feature].isna().sum()>1 and df[feature].dtypes!='O']
numerical_nan

In [None]:
df[numerical_nan].isna().sum()

In [None]:
## Replacing the numerical Missing Values

for feature in numerical_nan:
    ## We will replace by using median since there are outliers
    median_value=df[feature].median()
    
    df[feature].fillna(median_value,inplace=True)
    
df[numerical_nan].isnull().sum()

In [None]:
# categorical features with missing values
categorical_nan = [feature for feature in df.columns if df[feature].isna().sum()>0 and df[feature].dtypes=='O']
print(categorical_nan)

In [None]:
# replacing missing values in categorical features
for feature in categorical_nan:
    df[feature] = df[feature].fillna('None')

In [None]:
df[categorical_nan].isna().sum()

#Add columns with Latitude/Longitude. Thanks to Leonie. 

In [None]:
#Code by Leonie https://www.kaggle.com/code/iamleonie/wids-datathon-2022-phase-ii-climate-change-ai#Geographical

df['coords_eobs_lat'] = df['coords_eobs'].apply(lambda x: float(x[1:-1].split(', ')[0]))
df['coords_eobs_long'] = df['coords_eobs'].apply(lambda x: float(x[1:-1].split(', ')[1]))

#Just checked to read the new columns

In [None]:
df.head(2)

GeoPandas 0.10.2+0.g04d377f.dirty

"GeoPandas is an open source project to make working with geospatial data in python easier. GeoPandas extends the datatypes used by pandas to allow spatial operations on geometric types. Geometric operations are performed by shapely. Geopandas further depends on fiona for file access and matplotlib for plotting."

https://geopandas.org/en/stable/

In [None]:
!pip install geopandas

In [None]:
!pip install contextily

#Creating a CRS point through the "latitude" and "longitude" columns

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

geometry = [Point(xy) for xy in zip(df['coords_eobs_long'], df['coords_eobs_lat'])]

#EPSG codes

https://www.kaggle.com/code/mpwolke/geopandas-contextily-maps

For reference, a few very common projections and their EPSG codes:

WGS84 Latitude/Longitude: "EPSG:4326"

UTM Zones (North): "EPSG:32633"

UTM Zones (South): "EPSG:32733"

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

fr = gpd.GeoDataFrame(df, crs='EPSG:4326', geometry=geometry) #don't change geometry

#Transforming the dataset into Coordinate Reference System (CRS) of the GeoDataFrame

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

fr = fr.to_crs(crs='EPSG:3857')

In [None]:
fr.tail(2)

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

aix = fr.loc[(fr['city_name']=='AIX-EN-PROVENCE')]
aix.head(2)

<h1><span class="label label-default" style="background-color:white;border-radius:100px 100px; font-weight: bold; font-family:Garamond; font-size:20px; color:#DC143C; padding:10px">Climate Change and AI France</span></h1><br>

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

ax = fr.plot(figsize=(15,10), markersize=1, color='red')
contextily.add_basemap(ax)
ax.set_axis_off()

plt.title('Climate Change AI France')
plt.show()

<h1><span class="label label-default" style="background-color:white;border-radius:100px 100px; font-weight: bold; font-family:Garamond; font-size:20px; color:#DC143C; padding:10px">Climate Change Aix-en-Provence</span></h1><br>

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

ax = aix.plot(figsize=(15,10), markersize=1, color='blue')
contextily.add_basemap(ax)
ax.set_axis_off()

plt.title('Climate Change AI - Aix-en-Provence')
plt.show()

#C'est petite cette carte. N'est pas? 

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

tou = fr.loc[(fr['city_name']=='TOULOUSE')]
tou.head(2)

<h1><span class="label label-default" style="background-color:white;border-radius:100px 100px; font-weight: bold; font-family:Garamond; font-size:20px; color:#DC143C; padding:10px">Climate Change and AI Toulouse</span></h1><br>

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

ax = tou.plot(figsize=(15,10), markersize=1, color='red')
contextily.add_basemap(ax)
ax.set_axis_off()

plt.title('Climate Change AI - Toulouse')
plt.show()

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

ren = fr.loc[(fr['city_name']=='RENNES')]
ren.head(2)

<h1><span class="label label-default" style="background-color:white;border-radius:100px 100px; font-weight: bold; font-family:Garamond; font-size:20px; color:#DC143C; padding:10px">Climate Change and AI Rennes</span></h1><br>

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

ax = ren.plot(figsize=(15,10), markersize=1, color='red')
contextily.add_basemap(ax)
ax.set_axis_off()

plt.title('Climate Change AI - Rennes')
plt.show()

<h1><span class="label label-default" style="background-color:white;border-radius:100px 100px; font-weight: bold; font-family:Garamond; font-size:20px; color:#DC143C; padding:10px">Climate Change and AI France</span></h1><br>

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

ax1 = fr.plot(figsize=(12, 7), column='type', markersize=1, legend=True)
ax2 = fr.plot(figsize=(12, 7), column='floors', markersize=1, legend=True)
contextily.add_basemap(ax1)
contextily.add_basemap(ax2)
ax1.set_axis_off()
ax2.set_axis_off()

plt.title('Climate Change AI France')
plt.show()

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

ax1 = ren.plot(figsize=(12, 7), column='type', markersize=1, legend=True)
ax2 = ren.plot(figsize=(12, 7), column='consumption', markersize=1, legend=True)
contextily.add_basemap(ax1)
contextily.add_basemap(ax2)
ax1.set_axis_off()
ax2.set_axis_off()

plt.title('Climate Change AI Rennes')
plt.show()

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

#remove zero values
mask1 = fr['coords_eobs_long'] != 0
mask2 = fr['coords_eobs_lat'] != 0

x = fr[mask1&mask2]['coords_eobs_long']
y = fr[mask1&mask2]['coords_eobs_lat']
z = fr[mask1&mask2]['consumption']#original was frp ( fire radiative power) 

#use the scatter function
plt.figure(figsize=(10,10))
plt.title('Climate Change AI France')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
ax = plt.scatter(x, y, s=z/10, alpha=1)

plt.show;

#Dès Le XIIIe siècle?? Since 1300?! (Till 2017)

In [None]:
#Code by Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

ax = fr.plot(figsize=(15, 10), column='age', markersize=1, legend=True)
contextily.add_basemap(ax)
ax.set_axis_off()

plt.title('Climate Change AI France')
plt.show()

#The charts below were suppose to be Maps if we were working with GeoPandas (geojson file). Though we are working with csv files (a Panda Series).

"Plotting geopandas dataframe"  Now we have just clumsy charts. However, I'll save these snippets for the next time.

In [None]:
#Code by Devakumar K.P. https://www.kaggle.com/code/imdevskp/geopandas

fig, ax = plt.subplots(figsize=(10, 5))
df.plot(ax=ax,color='midnightblue')
plt.show()

In [None]:
type(df.geometry)

In [None]:
df.geometry[:5]

In [None]:
df["city_name"].value_counts()

In [None]:
arles = df[df['city_name'] =='ARLES']
arles.plot();

In [None]:
nice = df[df['city_name'] =='NICE']
nice.plot();

In [None]:
paris = df[(df['city_name']=='PARIS 15E ARRONDISSEMENT')].reset_index(drop=True)
paris.head(2)

In [None]:
paris = df[df['city_name'] =='PARIS 13E ARRONDISSEMENT']
paris.plot();

#Acknowledgements:

Rafael Herrero https://www.kaggle.com/rafaelherrero/2020-brazil-s-wildfire-analysis/notebook

Leonie https://www.kaggle.com/code/iamleonie/wids-datathon-2022-phase-ii-climate-change-ai#Geographical

Devakumar K.P. https://www.kaggle.com/code/imdevskp/geopandas