In [196]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [197]:
farms = pd.read_csv("../data-mining/farms.csv")
rubrics = pd.read_csv("../data-mining/rubrics.csv")
stocks = pd.read_csv("../data-mining/stocks.csv")

## Add departement column

In [198]:
def departement_of(code_postal: int) -> int:
    departement = int(str(code_postal)[:2])

    # dom_tom departement code is 3 digits-long instead of 2
    dom_tom: bool = departement == 97
    if dom_tom:
        departement = int(str(code_postal)[:3])

    return departement

farms["departement"] = farms.codePostal.apply(departement_of)

In [199]:
farms_stocks = farms.merge(stocks, left_on="name", right_on="raison_sociale")

In [200]:
farms_stocks = farms_stocks[["name", "rubric_id", "quantite", "unite", "departement", "bovins", "porcs", "volailles"]]
farms_stocks = farms_stocks[(farms_stocks.unite == "u") | (farms_stocks.unite == "Animaux-Eq")]
farms_stocks

Unnamed: 0,name,rubric_id,quantite,unite,departement,bovins,porcs,volailles
0,GAEC DES BOURRYS,2102,1069.0,Animaux-Eq,89,False,True,False
5,GAEC FERME H ET L,2101,210.0,u,50,True,False,False
6,BINOUDIERE (GAEC DE LA),2102,498.0,Animaux-Eq,50,True,True,False
8,BINOUDIERE (GAEC DE LA),2101,60.0,u,50,True,True,False
9,BINOUDIERE (GAEC DE LA),2101,80.0,u,50,True,True,False
...,...,...,...,...,...,...,...,...
23680,DE LA CLINERIE (GAEC) LEFEVRE Christian,2102,1059.0,Animaux-Eq,45,False,True,True
23682,DE LA CLINERIE (GAEC) LEFEVRE Christian,2111,25500.0,Animaux-Eq,45,False,True,True
23684,DES JOURNETS (SCEA),2102,1857.0,Animaux-Eq,41,False,True,False
23685,EARL DES MUSSETS,2111,40000.0,u,41,False,False,True


In [201]:
def animal_type(rubric_id: str) -> str:
    if rubric_id == "2101":
        return "BOVINS"

    if rubric_id == "2102" or rubric_id == "3660":
        return "PORCS"

    if rubric_id == "2111":
        return "VOLAILLES"

farms_stocks["type"] = farms_stocks.rubric_id.apply(animal_type)

convert u to Animaux-Eq

In [202]:
def u_to_animaux_eq(row):
    if row.unite == 'Animaux-Eq': # skip treatment
        return row

    if row.rubric_id == "2101": # equivalent to bovins
        row.quantite *= 1.0
        row.unite = "Animaux-Eq"

    if row.rubric_id == "2102" or row.rubric_id == "3660": # equivalent to porcs
        row.quantite *= 0.14
        row.unite = "Animaux-Eq"

    if row.rubric_id == "2111": # equivalent to volailles
        row.quantite *= 0.01
        row.unite = "Animaux-Eq"
    
    return row

farms_stocks = farms_stocks.apply(u_to_animaux_eq, axis=1)

In [203]:
farms_stocks

Unnamed: 0,name,rubric_id,quantite,unite,departement,bovins,porcs,volailles,type
0,GAEC DES BOURRYS,2102,1069.0,Animaux-Eq,89,False,True,False,PORCS
5,GAEC FERME H ET L,2101,210.0,Animaux-Eq,50,True,False,False,BOVINS
6,BINOUDIERE (GAEC DE LA),2102,498.0,Animaux-Eq,50,True,True,False,PORCS
8,BINOUDIERE (GAEC DE LA),2101,60.0,Animaux-Eq,50,True,True,False,BOVINS
9,BINOUDIERE (GAEC DE LA),2101,80.0,Animaux-Eq,50,True,True,False,BOVINS
...,...,...,...,...,...,...,...,...,...
23680,DE LA CLINERIE (GAEC) LEFEVRE Christian,2102,1059.0,Animaux-Eq,45,False,True,True,PORCS
23682,DE LA CLINERIE (GAEC) LEFEVRE Christian,2111,25500.0,Animaux-Eq,45,False,True,True,VOLAILLES
23684,DES JOURNETS (SCEA),2102,1857.0,Animaux-Eq,41,False,True,False,PORCS
23685,EARL DES MUSSETS,2111,400.0,Animaux-Eq,41,False,False,True,VOLAILLES


In [204]:
farms_stocks = pd.DataFrame({'quantite_mean' : farms_stocks.groupby(
    ["name", "rubric_id", "unite", "departement", "type"])['quantite'].mean()}).reset_index()

In [205]:
farms_stocks

Unnamed: 0,name,rubric_id,unite,departement,type,quantite_mean
0,2 L'oiselière (GAEC),2101,Animaux-Eq,50,BOVINS,130.000000
1,33 RUE DU BOCAGE (GAEC),2102,Animaux-Eq,50,PORCS,747.000000
2,4 CANTONS DUBRON (GAEC DES ),2101,Animaux-Eq,62,BOVINS,215.000000
3,4 CANTONS DUBRON (GAEC DES ),2102,Animaux-Eq,62,PORCS,384.000000
4,4 CANTONS DUBRON (GAEC DES ),2111,Animaux-Eq,62,VOLAILLES,22500.000000
...,...,...,...,...,...,...
10819,earl la roche marteau,2101,Animaux-Eq,36,BOVINS,180.000000
10820,gene +,2102,Animaux-Eq,62,PORCS,1478.200000
10821,quatres sites (GAEC des),2101,Animaux-Eq,19,BOVINS,107.666667
10822,quatres sites (GAEC des),2102,Animaux-Eq,19,PORCS,830.000000


In [206]:
farms_stocks["departement_unite_rank"] = farms_stocks.groupby(
    ["departement"])['quantite_mean'].rank(ascending=False)

In [207]:
farms_stocks["france_unite_rank"] = farms_stocks.groupby(
    ["unite"]
)["quantite_mean"].rank(ascending=False)

In [208]:
farms_stocks.sort_values(by=["france_unite_rank"], inplace=True)

In [209]:
farms_stocks

Unnamed: 0,name,rubric_id,unite,departement,type,quantite_mean,departement_unite_rank,france_unite_rank
8148,MARANGER NADINE,2111,Animaux-Eq,36,VOLAILLES,216000.0,1.0,1.0
5398,GAEC DE TREGUEE,2111,Animaux-Eq,29,VOLAILLES,92138.0,1.0,2.0
1078,EARL AVI-ROULAIS,2111,Animaux-Eq,44,VOLAILLES,81600.0,1.0,3.0
9801,SCEA DOMAINE DE ROUILLY M. MITEAULT,2111,Animaux-Eq,86,VOLAILLES,81101.0,1.0,4.0
1443,EARL DE CASTELBON (ex PUJOS J.P & S),2111,Animaux-Eq,32,VOLAILLES,77108.0,1.0,5.0
...,...,...,...,...,...,...,...,...
8959,SAEM ABATTOIR DU PAYS DE CHARLIEU,2101,Animaux-Eq,42,BOVINS,0.0,75.0,10821.5
1585,EARL DE KEROUER,2101,Animaux-Eq,29,BOVINS,0.0,1225.0,10821.5
8256,MICHEL ERIC,2102,Animaux-Eq,22,PORCS,0.0,1694.0,10821.5
537,CAZIN NADEGE,2101,Animaux-Eq,80,BOVINS,0.0,107.0,10821.5


In [210]:
farms_stocks_best = farms_stocks[farms_stocks["departement_unite_rank"] == 1]

In [211]:
farms_stocks_best

Unnamed: 0,name,rubric_id,unite,departement,type,quantite_mean,departement_unite_rank,france_unite_rank
8148,MARANGER NADINE,2111,Animaux-Eq,36,VOLAILLES,216000.0,1.0,1.0
5398,GAEC DE TREGUEE,2111,Animaux-Eq,29,VOLAILLES,92138.0,1.0,2.0
1078,EARL AVI-ROULAIS,2111,Animaux-Eq,44,VOLAILLES,81600.0,1.0,3.0
9801,SCEA DOMAINE DE ROUILLY M. MITEAULT,2111,Animaux-Eq,86,VOLAILLES,81101.0,1.0,4.0
1443,EARL DE CASTELBON (ex PUJOS J.P & S),2111,Animaux-Eq,32,VOLAILLES,77108.0,1.0,5.0
...,...,...,...,...,...,...,...,...
10483,SEPAB (SARL),2101,Animaux-Eq,34,BOVINS,1100.0,1.0,4375.0
10611,TERRAZZONI Frères (SCEA),2111,Animaux-Eq,20,VOLAILLES,900.0,1.0,5098.0
4152,ETABLISSEMENTS GUY HARANG,2102,Animaux-Eq,78,PORCS,800.0,1.0,5494.0
7992,LES GRILLONS (SARL),2111,Animaux-Eq,84,VOLAILLES,350.0,1.0,8100.5


In [212]:
farms_stocks_best = farms_stocks_best.drop(farms_stocks_best[farms_stocks_best.quantite_mean == 0].index)

In [213]:
farms_stocks_best

Unnamed: 0,name,rubric_id,unite,departement,type,quantite_mean,departement_unite_rank,france_unite_rank
8148,MARANGER NADINE,2111,Animaux-Eq,36,VOLAILLES,216000.0,1.0,1.0
5398,GAEC DE TREGUEE,2111,Animaux-Eq,29,VOLAILLES,92138.0,1.0,2.0
1078,EARL AVI-ROULAIS,2111,Animaux-Eq,44,VOLAILLES,81600.0,1.0,3.0
9801,SCEA DOMAINE DE ROUILLY M. MITEAULT,2111,Animaux-Eq,86,VOLAILLES,81101.0,1.0,4.0
1443,EARL DE CASTELBON (ex PUJOS J.P & S),2111,Animaux-Eq,32,VOLAILLES,77108.0,1.0,5.0
...,...,...,...,...,...,...,...,...
10483,SEPAB (SARL),2101,Animaux-Eq,34,BOVINS,1100.0,1.0,4375.0
10611,TERRAZZONI Frères (SCEA),2111,Animaux-Eq,20,VOLAILLES,900.0,1.0,5098.0
4152,ETABLISSEMENTS GUY HARANG,2102,Animaux-Eq,78,PORCS,800.0,1.0,5494.0
7992,LES GRILLONS (SARL),2111,Animaux-Eq,84,VOLAILLES,350.0,1.0,8100.5


## Export to json

In [215]:
farms_stocks_best.to_json("4.json", orient="records")