# Data Analysis

In [None]:
# Importation of libraries
import numpy as np
import pandas as pd
import tabulate

In [None]:
# Importing database
df = pd.read_csv("../database/data.csv", sep=";")
df.head()

## Important facts about database

In [None]:
sites = list(df["num_site"])
nb_sit = len(sites)

regions = list(df["nom_reg"])
nb_reg = len(np.unique(regions))

departments = list(df["nom_dep"])
nb_dep = len(np.unique(departments))

cities = list(df["nom_com"])
nb_cit = len(np.unique(cities))

providers = list(df["nom_op"])
nb_pro = len(np.unique(providers))

# print(nb_sit) # number of sites

# print(nb_sit/nb_reg) # mean number of sites per region

# print(nb_sit/nb_dep) # mean number of sites per department

# print(nb_sit/nb_cit) # mean number of sites per city

# for reg in np.unique(regions): # number of sites per region
#     print(reg + " : " + str(regions.count(reg)))

# for dep in np.unique(departments): # number of sites per department
#     print(dep + " : " + str(departments.count(dep)))

# for pro in np.unique(providers): # number of sites per provider
#     print(pro + " : " + str(providers.count(pro)))

# for cit in np.unique(cities): # number of sites per cities (for cities with a number >= 20)
#     count = cities.count(cit)
#     if (count >= 20):
#         nb_cit += 1
#         print(cit + " : " + str(count))

df_proPerReg = pd.DataFrame({
    "nom_reg" : np.unique(regions),
    "Free Mobile"   : [None for i in range(nb_reg)],
    "SFR"   : [None for i in range(nb_reg)],
    "Orange"   : [None for i in range(nb_reg)],
    "Bouygues Telecom"   : [None for i in range(nb_reg)],
    "Total"   : [None for i in range(nb_reg)]
})

for reg in np.unique(regions): # number of sites per region per provider
    for pro in np.unique(providers):
        count = list(df.loc[df["nom_reg"]==reg, "nom_op"]).count(pro)
        df_proPerReg.loc[df_proPerReg["nom_reg"]==reg, pro] = count
        # print(reg + ", " + pro + " : " + str(count))
    df_proPerReg.loc[df_proPerReg["nom_reg"]==reg, "Total"] = regions.count(reg)

df_proPerDep = pd.DataFrame({
    "nom_dep" : np.unique(departments),
    "Free Mobile"   : [None for i in range(nb_dep)],
    "SFR"   : [None for i in range(nb_dep)],
    "Orange"   : [None for i in range(nb_dep)],
    "Bouygues Telecom"   : [None for i in range(nb_dep)],
    "Total"   : [None for i in range(nb_dep)]
})

for dep in np.unique(departments): # number of sites per department per provider
    for pro in np.unique(providers):
        count = list(df.loc[df["nom_dep"]==dep, "nom_op"]).count(pro)
        df_proPerDep.loc[df_proPerDep["nom_dep"]==dep, pro] = count
    df_proPerDep.loc[df_proPerDep["nom_dep"]==dep, "Total"] = departments.count(dep)

In [None]:
df_proPerReg

In [None]:
nb_sit_2g = sum(df["site_2g"])
print(nb_sit_2g)

nb_sit_3g = sum(df["site_3g"])
print(nb_sit_3g)

nb_sit_4g = sum(df["site_4g"])
print(nb_sit_4g)

nb_sit_5g = sum(df["site_5g"])
print(nb_sit_5g)

In [None]:
results = open("data_numbers.md", "w")

results.write("# Analyse des données\n")

results.write("## Description des champs :\n cf. https://data.arcep.fr/mobile/sites/\n")

results.write("## Chiffres sur les stations de base :\n")

results.write("### Chiffres généraux :\n")
results.write("|||\n|---|---:|\n")
results.write("| Stations de base | " + str(nb_sit) + " | \n")
results.write("| Stations de base/région (moyenne) | " + str(round(nb_sit/nb_reg)) + " |\n")
results.write("| Stations de base/département (moyenne) | " + str(round(nb_sit/nb_dep)) + " |\n")
results.write("| Stations de base/ville (moyenne) | " + str(round(nb_sit/nb_cit)) + " |\n")

results.write("### Stations par opérateurs :\n")
results.write("|Opérateur|Nb de stations|\n|---|---:|\n")
for pro in np.unique(providers):
    results.write("| " + pro + " | " + str(providers.count(pro)) + " | \n")

results.write("### Stations par régions, par opérateurs :\n")
results.write(df_proPerReg.to_markdown(index=False) + "\n")

results.write("### Stations par départements, par opérateurs :\n")
results.write(df_proPerDep.to_markdown(index=False) + "\n")

results.write("### Type de réseau :\n")
results.write("|Type de réseau|Nb de stations|\n|---|---:|\n")
results.write("| 2g | " + str(nb_sit_2g) + " | \n")
results.write("| 3g | " + str(nb_sit_3g) + " | \n")
results.write("| 4g | " + str(nb_sit_4g) + " | \n")
results.write("| 5g | " + str(nb_sit_5g) + " | \n")

results.close()

# Contents of the database

In [None]:
# print(df.dtypes)

In [None]:
# for col_name in df.columns:
#     print(col_name + " : " + str(df[col_name].dtype) + " (" + str(df[col_name].notnull().count()) + ")")