In [52]:
#Importation des bibliothèques
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#Implémentation du modèle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import LabelEncoder
#Normalissation
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVR

In [4]:
#Chargement du dataset
df = pd.read_excel('Ecriturecompte.xlsx')

In [5]:
df.head(5)

Unnamed: 0,PFE,Écriture comptable article,32,Unnamed: 3,Unnamed: 4,Unnamed: 5
0,,,,,,
1,N° séquence,N° article,Date comptabilisation,Type écriture,Quantité,Type document
2,1,PI106,2021-04-01,Vente,-18.3,Expédition vente
3,2,70298,2021-04-01,Vente,-324.29,Expédition vente
4,3,RO047,2021-04-01,Vente,-204,Expédition vente


In [6]:
df_copie = df.copy()

In [7]:
df = df.iloc[2:]
# Réinitialiser l'index
df = df.reset_index(drop=True)
df.head()
#Renommer les colonnes
df=df.rename(columns={f'{df.columns.values[1]}': 'PRODUIT'})
df=df.rename(columns={f'{df.columns.values[2]}': 'PERIODE'})
df=df.rename(columns={f'{df.columns.values[4]}': 'QUANTITE'})
df['QUANTITE']=df['QUANTITE'].astype(float)
df.head(5)

Unnamed: 0,PFE,PRODUIT,PERIODE,Unnamed: 3,QUANTITE,Unnamed: 5
0,1,PI106,2021-04-01,Vente,-18.3,Expédition vente
1,2,70298,2021-04-01,Vente,-324.29,Expédition vente
2,3,RO047,2021-04-01,Vente,-204.0,Expédition vente
3,4,RO060,2021-04-01,Vente,-150.0,Expédition vente
4,5,70223,2021-04-01,Vente,-109.89,Expédition vente


In [8]:
#Sélection des colones
df=df[['PRODUIT','PERIODE','QUANTITE']]
df.head(5)

Unnamed: 0,PRODUIT,PERIODE,QUANTITE
0,PI106,2021-04-01,-18.3
1,70298,2021-04-01,-324.29
2,RO047,2021-04-01,-204.0
3,RO060,2021-04-01,-150.0
4,70223,2021-04-01,-109.89


In [9]:
#Rendre les quantité positives
df['QUANTITE'] = np.abs(df['QUANTITE'])
df.head(5)

Unnamed: 0,PRODUIT,PERIODE,QUANTITE
0,PI106,2021-04-01,18.3
1,70298,2021-04-01,324.29
2,RO047,2021-04-01,204.0
3,RO060,2021-04-01,150.0
4,70223,2021-04-01,109.89


In [10]:
df.info()
print(df.isnull().sum())
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180070 entries, 0 to 180069
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   PRODUIT   180070 non-null  object 
 1   PERIODE   180070 non-null  object 
 2   QUANTITE  180070 non-null  float64
dtypes: float64(1), object(2)
memory usage: 4.1+ MB
PRODUIT     0
PERIODE     0
QUANTITE    0
dtype: int64


Unnamed: 0,QUANTITE
count,180070.0
mean,298.424044
std,1134.336889
min,0.00488
25%,30.0
50%,80.0
75%,210.2375
max,178000.0


In [11]:
df.head(5)

Unnamed: 0,PRODUIT,PERIODE,QUANTITE
0,PI106,2021-04-01,18.3
1,70298,2021-04-01,324.29
2,RO047,2021-04-01,204.0
3,RO060,2021-04-01,150.0
4,70223,2021-04-01,109.89


In [12]:
df['PERIODE']=pd.to_datetime(df['PERIODE'])
df['PERIODE']=df['PERIODE'].dt.month
df.head(5)

Unnamed: 0,PRODUIT,PERIODE,QUANTITE
0,PI106,4,18.3
1,70298,4,324.29
2,RO047,4,204.0
3,RO060,4,150.0
4,70223,4,109.89


In [13]:
label_encoder = LabelEncoder()
df['PRODUIT_ENCODED'] = label_encoder.fit_transform(df['PRODUIT'])

In [14]:
df.head(5)

Unnamed: 0,PRODUIT,PERIODE,QUANTITE,PRODUIT_ENCODED
0,PI106,4,18.3,747
1,70298,4,324.29,39
2,RO047,4,204.0,772
3,RO060,4,150.0,780
4,70223,4,109.89,7


In [15]:
df = df[['PRODUIT_ENCODED','PERIODE','QUANTITE']]
df.head(5)

Unnamed: 0,PRODUIT_ENCODED,PERIODE,QUANTITE
0,747,4,18.3
1,39,4,324.29
2,772,4,204.0
3,780,4,150.0
4,7,4,109.89


In [16]:
scaler = MinMaxScaler()
df_normalized = scaler.fit_transform(df)
df_normalized = pd.DataFrame(df_normalized, columns=df.columns)
df_normalized.head(5)

Unnamed: 0,PRODUIT_ENCODED,PERIODE,QUANTITE
0,0.910976,0.272727,0.000103
1,0.047561,0.272727,0.001822
2,0.941463,0.272727,0.001146
3,0.95122,0.272727,0.000843
4,0.008537,0.272727,0.000617


In [33]:
# Standardisation des données
scaler2 = StandardScaler()
df_standar = scaler2.fit_transform(df)
df_standar = pd.DataFrame(df_standar, columns=df.columns)
df_standar.head(5)

Unnamed: 0,PRODUIT_ENCODED,PERIODE,QUANTITE
0,1.382079,-0.852158,-0.24695
1,-1.288916,-0.852158,0.022803
2,1.476394,-0.852158,-0.083242
3,1.506575,-0.852158,-0.130847
4,-1.409639,-0.852158,-0.166207


In [17]:
df_normalized.shape

(180070, 3)

In [18]:
a = scaler.inverse_transform(df_normalized)
a = pd.DataFrame(a, columns=df_normalized.columns)
a.head(5)

Unnamed: 0,PRODUIT_ENCODED,PERIODE,QUANTITE
0,747.0,4.0,18.3
1,39.0,4.0,324.29
2,772.0,4.0,204.0
3,780.0,4.0,150.0
4,7.0,4.0,109.89


In [19]:
a.shape

(180070, 3)

In [20]:
#Séparation des données de train et test
X=df_normalized.drop('QUANTITE',axis=1)
y=df_normalized['QUANTITE']
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)


In [34]:
#Séparation des données de train et test
X_=df_standar.drop('QUANTITE',axis=1)
y_=df_standar['QUANTITE']
x_train_,x_test_,y_train_,y_test_=train_test_split(X_,y_,test_size=0.2,random_state=42)

In [35]:
#print(x_train)
y_test
y_test_

10116    -0.130847
104876   -0.122780
35       -0.250477
71528    -0.174926
120999   -0.254267
            ...   
71774    -0.155857
782      -0.219004
149334    0.089547
168937   -0.254267
89820    -0.222002
Name: QUANTITE, Length: 36014, dtype: float64

In [22]:

#Implémentation du modlèle
model=LinearRegression()
model.fit(x_train,y_train)
predictions=model.predict(x_test)

In [36]:
#Implémentation du modlèle --Standardisation
model_stand=LinearRegression()
model_stand.fit(x_train,y_train)
predictions_stand=model_stand.predict(x_test)

In [48]:
#Evaluation du modèle
mse=mean_squared_error(y_test,predictions)
mae=mean_absolute_error(y_test,predictions)
r2=r2_score(y_test,predictions)
mape=mean_absolute_percentage_error(y_test_,predictions_stand)
rmse=root_mean_squared_error(y_test_,predictions_stand)


In [44]:
#Evaluation du modèle
mse_=mean_squared_error(y_test_,predictions_stand)
mae_=mean_absolute_error(y_test_,predictions_stand)
r2_=r2_score(y_test_,predictions_stand)


In [40]:
print(f'mse:{mse_}')
print(f'mae:{mae_}')
print(f'r2:{r2_}')

mse:0.8806962616031331
mae:0.3093050770328284
r2:-2.1494946289557504e-06


In [49]:
print(f'mse:{mse}')
print(f'mae:{mae}')
print(f'rmse:{rmse}')
print(f'mape:{mape}')
print(f'r2:{r2}')

mse:3.576550002312885e-05
mae:0.0019645987044048423
rmse:0.9384541872692205
mape:1.0719716674267927
r2:6.072122261668866e-06


In [25]:
x_test.head(5)

Unnamed: 0,PRODUIT_ENCODED,PERIODE
10116,0.107317,0.363636
104876,0.89878,0.909091
35,0.067073,0.272727
71528,0.065854,0.454545
120999,0.536585,0.0


In [26]:
#Resultat
resultats = pd.DataFrame({
    'PRODUIT_ENCODED': x_test['PRODUIT_ENCODED'],
    'PERIODE': x_test['PERIODE'],
    'QUANTITE': y_test,
    'predictions': predictions
})
resultats.head(5)

Unnamed: 0,PRODUIT_ENCODED,PERIODE,QUANTITE,predictions
10116,0.107317,0.363636,0.000843,0.00167
104876,0.89878,0.909091,0.000894,0.001682
35,0.067073,0.272727,8e-05,0.00167
71528,0.065854,0.454545,0.000562,0.001668
120999,0.536585,0.0,5.6e-05,0.001682


## Evaluation des performances du modèle

In [53]:
#Modèle SVr
X = df[['PRODUIT_ENCODED', 'PERIODE']]  # Variables explicatives
y = df['QUANTITE']  # Variable cible

# 1. Normalisation des données d'entrée (SVR est sensible à l'échelle des données)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 2. Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
svr = SVR(kernel='rbf')  # On utilise le noyau RBF pour la régression non linéaire
svr.fit(X_train, y_train)

# 4. Prédictions avec le modèle
y_pred = svr.predict(X_test)

# 5. Évaluation du modèle
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse}")

# Optionnel: Affichage des prédictions comparées aux valeurs réelles
comparison = pd.DataFrame({'Vraie Quantité': y_test, 'Quantité Prédite': y_pred})
print(comparison.head())

Mean Squared Error (MSE): 1178621.6409817617
        Vraie Quantité  Quantité Prédite
10116           150.00         96.622697
104876          159.15        104.994346
35               14.30         99.972629
71528           100.00         96.612154
120999           10.00         58.619945


In [30]:
accuracy_score = accuracy_score(y_test, predictions, normalize=True)

ValueError: continuous is not supported

In [28]:
# precision_score = precision_score(y_test, predictions)
# recall_score = recall_score(y_test, predictions)
# f1_score = f1_score(y_test, predictions)
# confusion_matrix = confusion_matrix(y_test, predictions)