# This is Constantin research notebook for the project smart city

In [306]:
from zipfile import ZipFile, Path
import pandas as pd
import plotly.express as px
import folium

In [307]:
def get_data(path: str) -> pd.DataFrame:
    '''
    Get data from zip file
    '''
    with ZipFile(path) as myzip:
        data = myzip.open(path.split('.zip')[0])

    df = pd.read_csv(data, sep =';')
    return df

In [308]:
df_2021_2022 = get_data("DMR_2021_2022.csv.zip")
df_2020 = get_data("DMR_2020.csv.zip")
df_2019 = get_data("DMR_2019.csv.zip")

In [309]:
df_2021_2022[['X', 'Y']] = df_2021_2022['geo_point_2d'].str.split(',', expand=True)


In [310]:
df_2021_2022 = df_2021_2022.drop(['ID DECLARATION', 'VILLE', 'ANNEE DECLARATION', 'MOIS DECLARATION',
         'OUTIL SOURCE', 'ID_DMR', 'geo_shape', 'geo_point_2d', 'CONSEIL DE QUARTIER'], axis = 1)

In [311]:
df_2019_2020 = pd.concat([df_2019, df_2020])
df_2019_2020 = df_2019_2020.drop(['OBJECTID', 'ID_DMR', 'VILLE' ,'ANNEEDECL' ,'MOISDECL' ,'NUMERO','PREFIXE', 'CONSEILQUARTIER',  ], axis = 1)


In [312]:
df_2019_2020.rename(columns = {'TYPE':'TYPE DECLARATION', 'SOUSTYPE':'SOUS TYPE DECLARATION', 'CODE_POSTAL':'CODE POSTAL', 'DATEDECL':'DATE DECLARATION'}, inplace = True)
df_2019_2020.rename(columns = {'Y':'LONGITUDE', 'X':'LATITUDE'}, inplace = True)
df_2021_2022.rename(columns = {'X':'LONGITUDE', 'Y':'LATITUDE'}, inplace = True)
df_final = pd.concat([df_2021_2022, df_2019_2020])


In [313]:
df_final.head()

Unnamed: 0,TYPE DECLARATION,SOUS TYPE DECLARATION,ADRESSE,CODE POSTAL,ARRONDISSEMENT,DATE DECLARATION,INTERVENANT,LONGITUDE,LATITUDE
0,Mobiliers urbains,Collecte des déchets:Trilib' dégradé,"18 Rue de Mazagran, 75010 PARIS",75010,10,2021-01-08,TRILIB',48.8707730016144,2.352240998304316
1,Objets abandonnés,Autres objets encombrants abandonnés,"58 Rue de l'Aqueduc, 75010 PARIS",75010,10,2021-01-08,Ramen en tant que prestataire de DansMaRue,48.88284700351721,2.365264406238782
2,Objets abandonnés,Autres objets encombrants abandonnés,"106 Rue des Amandiers, 75020 PARIS",75020,20,2021-01-08,Ramen en tant que prestataire de DansMaRue,48.86777999827195,2.388730996327768
3,"Graffitis, tags, affiches et autocollants","Graffitis sur mur, façade sur rue, pont","14 Rue Carrière-Mainguet, 75011 PARIS",75011,11,2021-01-08,graffitis,48.85659999566048,2.387607803079172
4,Objets abandonnés,Cartons,"1 Square Alfred Capus, 75016 PARIS",75016,16,2021-01-08,DPE-STPP-DT,48.85093700329557,2.259631402809926


In [314]:
arrondissement_count = df_final["ARRONDISSEMENT"].value_counts()

fig = px.bar(arrondissement_count, x=arrondissement_count.index, y='ARRONDISSEMENT',
             hover_data=[arrondissement_count.index, 'ARRONDISSEMENT'], color='ARRONDISSEMENT',
             labels={'index': "District number",'ARRONDISSEMENT':'District'}, height=400)
fig.update_xaxes(type='category')
fig.show()

In [315]:
import plotly.express as px


fig = px.pie(arrondissement_count, values='ARRONDISSEMENT',
            names=arrondissement_count.index, 
            title='Pie chart of anomalies by district',
            color='ARRONDISSEMENT',
            color_discrete_sequence=px.colors.cyclical.IceFire,
            labels={'index': "Number", 'ARRONDISSEMENT': 'Anomalies' }, height=400)
fig.update_layout(width=1000,
    height=700,
    legend_title_text='District'
)

fig.show()

In [316]:
df_final['LONGITUDE'] = df_final['LONGITUDE'].apply(lambda x: x.replace(',','.'))
df_final['LATITUDE'] =  df_final['LATITUDE'].apply(lambda x: x.replace(',','.'))


In [317]:

df_final['LONGITUDE'] = df_final['LONGITUDE'].astype(float)
df_final['LATITUDE'] = df_final['LATITUDE'].astype(float)
locations_x = df_final['LONGITUDE'].mean()
location_y = df_final['LATITUDE'].mean()

In [318]:
from folium.plugins import MarkerCluster 

paris = folium.Map(location = [locations_x, location_y], tiles="Stamen Terrain",zoom_start = 13)
df_final_1000 = df_final.sample(1000)
marker_cluster = MarkerCluster(
    name='1000 clustered icons',
    overlay=True,
    control=False,
    icon_create_function=None
)
for index, location_info in df_final_1000.iterrows():
    marker = folium.Marker([location_info["LONGITUDE"], location_info["LATITUDE"]])
    popup='Type: {}<br>Arrondisement: {} <br> Adresse: {} <br>  Date declaration {}'.format(
                                                            location_info["TYPE DECLARATION"], 
                                                            location_info["ARRONDISSEMENT"],
                                                            location_info["ADRESSE"],
                                                            location_info['DATE DECLARATION'])
    folium.Popup(popup, min_width=300, max_width=300).add_to(marker)
    marker_cluster.add_child(marker)

marker_cluster.add_to(paris)

folium.LayerControl().add_to(paris)

paris

In [407]:
df_final['TYPE DECLARATION'].replace(['Autos, motos, vélos... '],'Autos, motos, vélos...',inplace = True)

df_count_type = df_final['TYPE DECLARATION'].value_counts()

fig = px.histogram(df_count_type, y = df_count_type.index, x = 'TYPE DECLARATION',
            color='TYPE DECLARATION',
            width=1200, height=500,
            orientation='h',
            labels = {'index': 'Type', 'TYPE DECLARATION': 'Total'},)
fig.show()

In [419]:
df_count_sous_type_20 = df_final['SOUS TYPE DECLARATION'].value_counts()
# without the first row because it has like 4000 k counted value
df_count_sous_type_20 = df_count_sous_type_20[1:20]
fig = px.bar(df_count_sous_type_20, y = df_count_sous_type_20.index, x = 'SOUS TYPE DECLARATION',
            color='SOUS TYPE DECLARATION',
            width=1200, height=600,
            orientation='h',
            labels = {'index': 'Type', 'SOUS TYPE DECLARATION': 'Total'},)
fig.show()

In [425]:
df_date  = df_final.groupby('DATE DECLARATION')['DATE DECLARATION'].count()
px.line(df_date)