# **STATISTIQUE ET PROBABILITES**

Name: **STATISTIQUE DESCRIPTIVE - DESCRIPTION D'UNE SERIE STATISTIQUE**  
Date : 2023  
Author: Aurélien Vannieuwenhuyze  


<a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/"><img alt="Licence Creative Commons" style="border-width:0" src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" /></a><br />This work is licensed under the terms of the <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.</a>.
<hr/>

In [None]:
import numpy as np
from prettytable import PrettyTable


## **1: DISTRIBUTION STATISTIQUE DISCRETE OU CONTINUE**

### 1.1 Effectifs et frequences

In [None]:
X=[0,0,5,1,2,3,3]

In [None]:
def tableau_effectifs_frequences(X):
  
  #Valeurs xi uniques et triées par ordre croissant
  xi = np.unique(X)

  #Effectifs
  ni = []
  for x in xi:
    ni.append(np.count_nonzero(X==x))
  
  #Frequences
  fi = []
  N =  len(X)
  for n in ni:
    fi.append(n/N)

  #Frequences cumulées
  ficum = []
  somme=0
  for f in fi:
    somme += f
    ficum.append(somme)

  tab = PrettyTable()
  tab.field_names = ["xi", "ni", "fi","fi cumulées"]
  for i in range(0,len(xi)):
    tab.add_row([xi[i],ni[i],round(fi[i],2),round(ficum[i],2)])

  print(tab)




In [None]:
tableau_effectifs_frequences(X)

+----+----+------+-------------+
| xi | ni |  fi  | fi cumulées |
+----+----+------+-------------+
| 0  | 2  | 0.29 |     0.29    |
| 1  | 1  | 0.14 |     0.43    |
| 2  | 1  | 0.14 |     0.57    |
| 3  | 2  | 0.29 |     0.86    |
| 5  | 1  | 0.14 |     1.0     |
+----+----+------+-------------+


## **2: DISTRIBUTION STATISTIQUE GROUPEE**

In [None]:
datas = [153, 165, 160, 150, 159, 151, 163,
160, 158, 149, 154, 153, 163, 140,
158, 150, 158, 155, 163, 159, 157,
162, 160, 152, 164, 158, 153, 162,
166, 162, 165, 157, 174, 158, 171,
162, 155, 156, 159, 162, 152, 158,
164, 164, 162, 158, 156, 171, 164,
158]

In [None]:
N=len(datas)
Min = min(datas)
Max = max(datas)

### 2.1: Discretisation

#### 2.1.1: Racine carrée

In [None]:
k = np.sqrt(N)
print(k)
print(np.ceil(k))

7.0710678118654755
8.0


In [None]:
len(np.histogram_bin_edges(datas,bins="sqrt"))

9

####2.1.2 Regle de Sturges

In [None]:
k = 10/3 * np.log10(N) + 1
print(k)
print(np.ceil(k))

6.663233347786729
7.0


In [None]:
len(np.histogram_bin_edges(datas,bins="sturges"))

8

In [None]:
diff = Max-Min
C = np.log2(N)
np.ceil((diff / ((diff/C ))+1) + 1)

8.0

####2.1.3: Regle de Rice

In [None]:
k = 2 * ((N)**(1/3))
print(k)
print(np.ceil(k))

7.368062997280773
8.0


In [None]:
len(np.histogram_bin_edges(datas,bins="rice"))

9

In [None]:
diff = Max-Min
C = 2 * ((N)**(1/3))
np.ceil((diff / ((diff/C ))) + 1)

9.0

####2.1.4: Regle de Scott

In [None]:
std = np.std(datas,ddof=1)
k = (diff) / (3.5 * std * N**(-1/3))
print(k)
print(np.ceil(k))

5.868886052839139
6.0


In [None]:
len(np.histogram_bin_edges(datas,bins="scott"))

7

In [None]:
C = (diff) / (3.5 * np.std(datas) * N**(-1/3))
np.ceil((diff / ((diff/C ))) + 1)

7.0

####2.1.5: Regle de Freedman-Diaconis

In [None]:
Q1 = np.quantile(datas,0.25)
Q3 = np.quantile(datas,0.75)
EIQ = Q3-Q1
print(EIQ)

7.5


In [None]:
k = (diff)/ (2*(EIQ/(N)**(1/3)))
print(k)
print(np.ceil(k))

8.35047139691821
9.0


####2.1.6: Intervalles de classes

In [None]:
def intervalles_classes(X,k):
  Min = min(X)
  Max = max(X)
  IC =np.ceil((Max-Min) / k)

  I=[]
  for i in range(0,k):
    bmin = Min
    bmax = Min+IC
    I.append([bmin,bmax])
    Min = bmax  

  tab = PrettyTable()
  tab.field_names=(["ci"])
  for i in range(0,len(I)):
    tab.add_row(['['+str(I[i][0])+";"+str(I[i][1])+'['])

  print(tab)
  return I


In [None]:
IC = intervalles_classes(datas,7)

+---------------+
|       ci      |
+---------------+
|  [140;145.0[  |
| [145.0;150.0[ |
| [150.0;155.0[ |
| [155.0;160.0[ |
| [160.0;165.0[ |
| [165.0;170.0[ |
| [170.0;175.0[ |
+---------------+


####2.1.7: Outils de discretisation

In [None]:
def nombre_de_classes(X):
  N=len(X)
  Min = min(X)
  Max = max(X)
  diff = Max - Min
  std = np.std(X,ddof=1)
  Q1 = np.quantile(X,0.25)
  Q3 = np.quantile(X,0.75)
  EIQ = Q3-Q1
  k1 = np.sqrt(N)
  k2 = 10/3 * np.log10(N) + 1
  k3 = 2 * ((N)**(1/3))
  k4 = (diff) / (3.5 * std * N**(-1/3))
  k5 = (diff)/ (2*(EIQ/(N)**(1/3)))

  tab = PrettyTable()
  tab.field_names=["Regles","k"]
  tab.add_row(["Racine carrée",np.ceil(k1)])
  tab.add_row(["Règle de Sturges",np.ceil(k2)])
  tab.add_row(["Règle de Rice",np.ceil(k3)])
  tab.add_row(["Règle de Scott",np.ceil(k4)])
  tab.add_row(["Règle de Freedman-Diaconis",np.ceil(k5)])

  print(tab)

In [None]:
nombre_de_classes(datas)

+----------------------------+-----+
|           Regles           |  k  |
+----------------------------+-----+
|       Racine carrée        | 8.0 |
|      Règle de Sturges      | 7.0 |
|       Règle de Rice        | 8.0 |
|       Règle de Scott       | 6.0 |
| Règle de Freedman-Diaconis | 9.0 |
+----------------------------+-----+


###2.2: Effectifs et fréquences

In [None]:
def tableau_effectifs_frequences_groupees(X,IC):

  #Effectif
  ni=[]
  for i in IC:
    bMin = i[0]
    bMax = i[1]
    n=0
    for x in X:
      if (x>= bMin and x<bMax):
        n+=1
    ni.append(n)

  #Frequences
  N= len(X)
  fi = []
  for n in ni:
    fi.append(n/N)

  #Frequences cumulées
  ficum = []
  somme=0
  for f in fi:
    somme += f
    ficum.append(somme)

  tab = PrettyTable()
  tab.field_names=(["ci","ni","fi","fi cumulées"])
  for i in range(0,len(IC)):
    intervalle = '['+str(IC[i][0])+";"+str(IC[i][1])+'['
    n = ni[i]
    f = fi[i]
    cf= ficum[i]
    tab.add_row([intervalle,n,round(f,2),round(cf,2)])

  print(tab)

    


In [None]:
tableau_effectifs_frequences_groupees(datas,IC)

+---------------+----+------+-------------+
|       ci      | ni |  fi  | fi cumulées |
+---------------+----+------+-------------+
|  [140;145.0[  | 1  | 0.02 |     0.02    |
| [145.0;150.0[ | 1  | 0.02 |     0.04    |
| [150.0;155.0[ | 9  | 0.18 |     0.22    |
| [155.0;160.0[ | 17 | 0.34 |     0.56    |
| [160.0;165.0[ | 16 | 0.32 |     0.88    |
| [165.0;170.0[ | 3  | 0.06 |     0.94    |
| [170.0;175.0[ | 3  | 0.06 |     1.0     |
+---------------+----+------+-------------+


##**3: DESCRIPTION STATISTIQUE D'UNE VARIABLE QUALITATIVE**

###3.1: Tableau de contingence

In [None]:
dqualitative = ["Grand","Petit","Petit","Moyen","Grand","Grand"]

In [None]:
def tableau_contingence(X):
  Q = np.unique(dqualitative)
  c = []
  for q in Q:
    n=0
    for i in range(0,len(X)):

      if (X[i] == q):
        n+=1

    c.append(n)

  tab = PrettyTable()
  tab.field_names = Q
  tab.add_row(np.transpose(c))
  print(tab)

In [None]:
tableau_contingence(dqualitative)

+-------+-------+-------+
| Grand | Moyen | Petit |
+-------+-------+-------+
|   3   |   1   |   2   |
+-------+-------+-------+
