# Projet 08 : Communiquer des résultats
# Dashboard Tableau : Préparation des données pour le dashboard

---
**Importation des librairies**

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
#import warnings
#warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import seaborn as sns
import scipy.stats as st
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn import model_selection

import modules_perso.sf_graphiques as sfg
import modules_perso.sf_classification_acp as sfca
import modules_perso.sf_modeles_anova as sfma

<a id='1_0'></a>

---
## <font color=blue>Préparation des données pour le dashboard</font>

**Modification des clusters pour qu'ils aient tous la même structure**

Cluster de 1 à 9 pour la matrice 3x3, et cluster 10 pour le super cluster:  
. . . 10  
1 . 2 . 3  
4 . 5 . 6  
7 . 8 . 9

**Chargement des données de base**

In [4]:
agreg_contrat_indicateur = pd.read_csv("agreg_contrat_indicateur.csv", index_col=0, parse_dates=["dateref"])
somme_par_indicateur = pd.pivot_table(agreg_contrat_indicateur, values="somme_signaux", columns="trading_indicator", \
    index=["contract_id","dateref"], aggfunc=np.sum)
somme_par_indicateur.head()

  mask |= (ar1 == a)


Unnamed: 0_level_0,trading_indicator,BRO,FOT,MOM
contract_id,dateref,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1989-11-01,0,0,0
1,1989-11-02,0,0,0
1,1989-11-03,0,0,0
1,1989-11-06,0,0,0
1,1989-11-07,0,0,0


In [5]:
data21 = pd.read_csv("data21jours.csv", parse_dates=["dateref"], index_col=[0])
data21[["cluster","variation_21_standard","somme_signaux"]].groupby("cluster").mean()

Unnamed: 0_level_0,variation_21_standard,somme_signaux
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.620407,337.330407
1,-0.025331,352.127426
2,0.049873,31.790207
3,-0.420266,-313.136761
4,0.316237,-303.222926
5,-0.332846,350.268246
6,-0.311543,19.026631
7,-0.045527,-305.174972
8,0.408611,29.357791
9,0.257858,351.520789


In [6]:
myclusters = pd.DataFrame([[4,1],[8,2],[9,3],\
                           [7,4],[2,5],[1,6],\
                           [3,7],[6,8],[5,9],\
                           [0,10]], columns=["cluster_initial","cluster21"])
myclusters.set_index("cluster_initial", inplace=True)
data21 = data21.merge(myclusters, how='left', left_on="cluster", right_index=True)
data_variations = data21[["contract_id","dateref"]].copy()
data_variations["horizon_mois"] = 1
data_variations["variation_prix"] = data21.variation_21_standard
data_variations["cluster"] = data21.cluster21
data_variations.head()

Unnamed: 0,category_name,contract_id,dateref,horizon_mois,variation_prix,cluster
0,Currencies,1,1989-11-01,1,0.082215,5
1,Currencies,1,1989-11-02,1,0.239948,2
2,Currencies,1,1989-11-03,1,0.198553,5
3,Currencies,1,1989-11-06,1,0.0978,5
4,Currencies,1,1989-11-07,1,0.091391,5


In [7]:
data_somme_signaux = data21[["category_name","contract_id","dateref","somme_signaux"]].copy()
data_somme_signaux = data_somme_signaux.merge(somme_par_indicateur, how='left', left_on=["contract_id","dateref"], right_index=True)
data_somme_signaux.head()

Unnamed: 0,category_name,contract_id,dateref,somme_signaux,BRO,FOT,MOM
0,Currencies,1,1989-11-01,0,0,0,0
1,Currencies,1,1989-11-02,0,0,0,0
2,Currencies,1,1989-11-03,0,0,0,0
3,Currencies,1,1989-11-06,0,0,0,0
4,Currencies,1,1989-11-07,0,0,0,0


In [8]:
data42 = pd.read_csv("data42jours.csv", parse_dates=["dateref"], index_col=[0])
data42[["cluster","variation_42_standard","somme_signaux"]].groupby("cluster").mean()

Unnamed: 0_level_0,variation_42_standard,somme_signaux
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1
0,-0.062665,24.597352
1,0.069174,353.129468
2,0.030296,-307.330278
3,0.938778,241.107483
4,-0.35796,351.391101
5,-0.566036,32.683596
6,0.408568,38.19388
7,0.565371,-284.540715
8,0.469822,362.361508
9,-0.475849,-315.474263


In [9]:
myclusters = pd.DataFrame([[7,1],[6,2],[8,3],\
                           [2,4],[0,5],[1,6],\
                           [9,7],[5,8],[4,9],\
                           [3,10]], columns=["cluster_initial","cluster42"])
myclusters.set_index("cluster_initial", inplace=True)
data42 = data42.merge(myclusters, how='left', left_on="cluster", right_index=True)
inter_variations = data42[["contract_id","dateref"]].copy()
inter_variations["horizon_mois"] = 2
inter_variations["variation_prix"] = data42.variation_42_standard
inter_variations["cluster"] = data42.cluster42
data_variations = pd.concat([data_variations, inter_variations], ignore_index=True)
data_variations.tail()

In [10]:
data64 = pd.read_csv("data64jours.csv", parse_dates=["dateref"], index_col=[0])
data64[["cluster","variation_64_standard","somme_signaux"]].groupby("cluster").mean()

Unnamed: 0_level_0,variation_64_standard,somme_signaux
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.575046,361.073299
1,0.081166,-304.298003
2,-0.424973,350.212749
3,0.523248,34.9734
4,-0.06093,27.912297
5,0.091973,355.078456
6,1.167483,241.773857
7,-0.534041,-318.238898
8,-0.666221,27.579797
9,0.756264,-281.138131


In [11]:
myclusters = pd.DataFrame([[9,1],[3,2],[0,3],\
                           [1,4],[4,5],[5,6],\
                           [7,7],[8,8],[2,9],\
                           [6,10]], columns=["cluster_initial","cluster64"])
myclusters.set_index("cluster_initial", inplace=True)
data64 = data64.merge(myclusters, how='left', left_on="cluster", right_index=True)
inter_variations = data64[["contract_id","dateref"]].copy()
inter_variations["horizon_mois"] = 3
inter_variations["variation_prix"] = data64.variation_64_standard
inter_variations["cluster"] = data64.cluster64
data_variations = pd.concat([data_variations, inter_variations], ignore_index=True)
data_variations.tail()

Unnamed: 0,category_name,contract_id,dateref,horizon_mois,variation_prix,cluster
1075135,STIR,40,2019-08-09,3,-0.379727,9
1075136,STIR,40,2019-08-12,3,-0.414566,9
1075137,STIR,40,2019-08-13,3,-0.340556,9
1075138,STIR,40,2019-08-14,3,-0.277606,9
1075139,STIR,40,2019-08-15,3,-0.286355,9


**Sauvegarde des données complètes pour la modélisation**

In [12]:
data_somme_signaux.to_csv("projet08_dashboard_sommesignaux.csv")
data_variations.to_csv("projet08_dashboard_variations.csv")