# This is Constantin research notebook for the project smart city

In [20]:
from zipfile import ZipFile, Path
import pandas as pd
import plotly.express as px


In [3]:
def get_data(path: str) -> pd.DataFrame:
    '''
    Get data from zip file
    '''
    with ZipFile(path) as myzip:
        data = myzip.open(path.split('.zip')[0])

    df = pd.read_csv(data, sep =';')
    return df

In [4]:
df_2021_2022 = get_data("DMR_2021_2022.csv.zip")
df_2020 = get_data("DMR_2020.csv.zip")
df_2019 = get_data("DMR_2019.csv.zip")

In [5]:
df_2021_2022[['X', 'Y']] = df_2021_2022['geo_point_2d'].str.split(',', expand=True)
df_2021_2022['X'] = df_2021_2022['X'].astype(float)
df_2021_2022['Y'] = df_2021_2022['Y'].astype(float)

In [6]:
df_2021_2022 = df_2021_2022.drop(['ID DECLARATION', 'VILLE', 'ANNEE DECLARATION', 'MOIS DECLARATION',
         'OUTIL SOURCE', 'ID_DMR', 'geo_shape', 'geo_point_2d', 'CONSEIL DE QUARTIER'], axis = 1)

In [7]:
df_2019_2020 = pd.concat([df_2019, df_2020])
df_2019_2020 = df_2019_2020.drop(['OBJECTID', 'ID_DMR', 'VILLE' ,'ANNEEDECL' ,'MOISDECL' ,'NUMERO','PREFIXE', 'CONSEILQUARTIER',  ], axis = 1)


In [8]:
df_2019_2020.rename(columns = {'TYPE':'TYPE DECLARATION', 'SOUSTYPE':'SOUS TYPE DECLARATION', 'CODE_POSTAL':'CODE POSTAL', 'DATEDECL':'DATE DECLARATION'}, inplace = True)
df_final = pd.concat([df_2021_2022, df_2019_2020])


In [9]:
df_final

Unnamed: 0,TYPE DECLARATION,SOUS TYPE DECLARATION,ADRESSE,CODE POSTAL,ARRONDISSEMENT,DATE DECLARATION,INTERVENANT,X,Y
0,Mobiliers urbains,Collecte des déchets:Trilib' dégradé,"18 Rue de Mazagran, 75010 PARIS",75010,10,2021-01-08,TRILIB',48.870773,2.352241
1,Objets abandonnés,Autres objets encombrants abandonnés,"58 Rue de l'Aqueduc, 75010 PARIS",75010,10,2021-01-08,Ramen en tant que prestataire de DansMaRue,48.882847,2.365264
2,Objets abandonnés,Autres objets encombrants abandonnés,"106 Rue des Amandiers, 75020 PARIS",75020,20,2021-01-08,Ramen en tant que prestataire de DansMaRue,48.86778,2.388731
3,"Graffitis, tags, affiches et autocollants","Graffitis sur mur, façade sur rue, pont","14 Rue Carrière-Mainguet, 75011 PARIS",75011,11,2021-01-08,graffitis,48.8566,2.387608
4,Objets abandonnés,Cartons,"1 Square Alfred Capus, 75016 PARIS",75016,16,2021-01-08,DPE-STPP-DT,48.850937,2.259631
...,...,...,...,...,...,...,...,...,...
586265,Propreté,Déchets et/ou salissures divers,"8 Cité Paradis, 75010 PARIS",75010,10,2020-12-31 0:00:00,DPE,2350695102,4887483
586266,Objets abandonnés,Autres objets encombrants abandonnés,"5 boulevard de Belleville, 75011 PARIS",75011,11,2020-12-31 0:00:00,Ramen en tant que prestataire de DansMaRue,23821876,48867542
586267,Éclairage / Électricité,Éclairage public éteint la nuit,"14 rue Leo Frankel, 75013 PARIS",75013,13,2020-12-31 0:00:00,EVESA_ToutParis,2377989302,4882715
586268,"Graffitis, tags, affiches et autocollants","Graffitis sur mur, façade sur rue, pont","2 rue Saint-Claude, 75003 PARIS",75003,3,2020-12-31 0:00:00,graffitis,2367085696,4886015


In [115]:
arrondissement_count = df_final["ARRONDISSEMENT"].value_counts()

fig = px.bar(arrondissement_count, x=arrondissement_count.index, y='ARRONDISSEMENT',
             hover_data=[arrondissement_count.index, 'ARRONDISSEMENT'], color='ARRONDISSEMENT',
             labels={'index': "District number",'ARRONDISSEMENT':'District'}, height=400)
fig.update_xaxes(type='category')
fig.show()


In [215]:
import plotly.express as px


fig = px.pie(arrondissement_count, values='ARRONDISSEMENT',
            names=arrondissement_count.index, 
            title='Pie chart of anomalies by district',
            color='ARRONDISSEMENT',
            color_discrete_sequence=px.colors.cyclical.IceFire,
            labels={'index': "Number", 'ARRONDISSEMENT': 'Anomalies' }, height=400)
fig.update_layout(width=1000,
    height=700,
    legend_title_text='District'
)

fig.show()