# <center>Python: Porównanie algorytmów</center>
---
## 1. Konstrukcja modeli i mierników parametrów
### 1.1 Biblioteki i edycja danych
#### > Biblioteki

In [1]:
import pandas as pd
from statistics import mean, stdev
import statsmodels.api as sm
import statsmodels.formula.api as smf
from patsy import dmatrices
import time
%load_ext line_profiler
%load_ext memory_profiler

#### > Wczytanie i edycja danych

In [2]:
data_set = pd.read_csv("cows.csv", delimiter=";")

cols = ["btn3a1", "lactation"]
for col in cols:
    data_set[col] = data_set[col].astype("category")
    
print(data_set.head())

# Formuła
formula = "milk~btn3a1+lactation"

# # Macierz
y, X = dmatrices(formula, data=data_set, return_type="dataframe")
groups = data_set["cow.id"]

   cow.id btn3a1 lactation  milk  fat
0       1      1         1  7770  358
1       2      1         1  7341  376
2       3      1         1  6998  294
3       3      1         2  8564  331
4       3      1         3  8621  330


### 1.2 Funkcje
#### > Funkcja tworząca modele

In [3]:
def LMM(model):
    if model == "formula":
        LMM_formula = smf.mixedlm(formula, data_set, groups=groups)
        LMMF_formula = LMM_formula.fit()
        return LMMF_formula

    elif model == "matrix":
        LMM_matrix = sm.MixedLM(y, X, groups=groups)
        LMMF_matrix = LMM_matrix.fit()
        return LMMF_matrix

#### > Funkcja sprawdzająca czas

In [4]:
def check_time(model, n):
    times = []

    for i in range(n):
        start_time = time.time()
        LMM(model)
        end_time = time.time()
        times.append(round(end_time - start_time, 4))
    return times

## 2. Symulacje
### 2.1 Podsumowanie modeli
#### > statsmodel "formula"

In [5]:
LMM("formula").summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,milk
No. Observations:,1000,Method:,REML
No. Groups:,409,Scale:,1252913.7714
Min. group size:,1,Log-Likelihood:,-8651.8138
Max. group size:,4,Converged:,Yes
Mean group size:,2.4,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,6699.448,81.078,82.630,0.000,6540.538,6858.358
btn3a1[T.2],-244.084,235.197,-1.038,0.299,-705.062,216.894
lactation[T.2],1307.037,84.712,15.429,0.000,1141.005,1473.069
lactation[T.3],1800.538,102.283,17.604,0.000,1600.067,2001.008
lactation[T.4],1669.271,177.492,9.405,0.000,1321.394,2017.148
Group Var,1240396.526,147.330,,,,


#### > statsmodel "matrix"

In [6]:
LMM("matrix").summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,milk
No. Observations:,1000,Method:,REML
No. Groups:,409,Scale:,1252913.7714
Min. group size:,1,Log-Likelihood:,-8651.8138
Max. group size:,4,Converged:,Yes
Mean group size:,2.4,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,6699.448,81.078,82.630,0.000,6540.538,6858.358
btn3a1[T.2],-244.084,235.197,-1.038,0.299,-705.062,216.894
lactation[T.2],1307.037,84.712,15.429,0.000,1141.005,1473.069
lactation[T.3],1800.538,102.283,17.604,0.000,1600.067,2001.008
lactation[T.4],1669.271,177.492,9.405,0.000,1321.394,2017.148
Group Var,1240396.526,147.330,,,,


### 2.2 Sprawdzenie czasu konstrukcji modelu
#### > formula - 100 razy

In [7]:
formula_times = check_time("formula", 100)

pd.DataFrame.to_clipboard(pd.DataFrame(formula_times))

print(mean(formula_times),
      stdev(formula_times))

0.676473 0.07436293995932342


In [16]:
%timeit LMM("formula")

663 ms ± 24.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Timer unit: 1e-07 s

Total time: 1.08764 s
File: <ipython-input-3-55d36bd81f50>
Function: LMM at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def LMM(model):
     2         1        127.0    127.0      0.0      if model == "formula":
     3         1     326454.0 326454.0      3.0          LMM_formula = smf.mixedlm(formula, data_set, groups=groups)
     4         1   10549835.0 10549835.0     97.0          LMMF_formula = LMM_formula.fit()
     5         1         28.0     28.0      0.0          return LMMF_formula
     6                                           
     7                                               elif model == "matrix":
     8                                                   LMM_matrix = sm.MixedLM(y, X, groups=groups)
     9                                                   LMMF_matrix = LMM_matrix.fit()
    10                                                   return LMMF_matrix

#### > matrix - 100 razy

In [8]:
matrix_times = check_time("matrix", 100)

pd.DataFrame.to_clipboard(pd.DataFrame(matrix_times))

print(mean(matrix_times),
      stdev(matrix_times))

0.624238 0.028944317074498238


In [10]:
%timeit LMM("matrix")

629 ms ± 24.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Timer unit: 1e-07 s

Total time: 0.973522 s
File: <ipython-input-3-55d36bd81f50>
Function: LMM at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def LMM(model):
     2         1         27.0     27.0      0.0      if model == "formula":
     3                                                   LMM_formula = smf.mixedlm(formula, data_set, groups=groups)
     4                                                   LMMF_formula = LMM_formula.fit()
     5                                                   return LMMF_formula
     6                                           
     7         1          9.0      9.0      0.0      elif model == "matrix":
     8         1     169398.0 169398.0      1.7          LMM_matrix = sm.MixedLM(y, X, groups=groups)
     9         1    9565761.0 9565761.0     98.3          LMMF_matrix = LMM_matrix.fit()
    10         1         26.0     26.0      0.0          return LMMF_matrix

### 2.3 Sprawdzenie zużywanej pamięci RAM
#### > Z użyciem "IPython Magic Commands"
##### >> statsmodel "formula"

In [11]:
%memit LMM("formula")

peak memory: 132.26 MiB, increment: 0.02 MiB


Line #    Mem usage    Increment   Line Contents
================================================
    22     97.2 MiB     97.2 MiB   @profile
    23                             def LMM(model):
    24     97.2 MiB      0.0 MiB       if model == "formula":
    25     97.5 MiB      0.3 MiB           LMM_formula = smf.mixedlm(formula, data_set, groups=groups)
    26     98.1 MiB      0.6 MiB           LMMF_formula = LMM_formula.fit()
    27     98.1 MiB      0.0 MiB           return LMMF_formula
    28                             
    29                                 elif model == "matrix":
    30                                     LMM_matrix = sm.MixedLM(y, X, groups=groups)
    31                                     LMMF_matrix = LMM_matrix.fit()
    32                                     return LMMF_matrix

##### >> statsmodel "matrix"

In [12]:
%memit LMM("matrix")

peak memory: 132.01 MiB, increment: 0.00 MiB


Line #    Mem usage    Increment   Line Contents
================================================
    22     97.0 MiB     97.0 MiB   @profile
    23                             def LMM(model):
    24     97.0 MiB      0.0 MiB       if model == "formula":
    25                                     LMM_formula = smf.mixedlm(formula, data_set, groups=groups)
    26                                     LMMF_formula = LMM_formula.fit()
    27                                     return LMMF_formula
    28                             
    29     97.0 MiB      0.0 MiB       elif model == "matrix":
    30     97.3 MiB      0.2 MiB           LMM_matrix = sm.MixedLM(y, X, groups=groups)
    31     97.9 MiB      0.6 MiB           LMMF_matrix = LMM_matrix.fit()
    32     97.9 MiB      0.0 MiB           return LMMF_matrix

#### > Z użyciem biblioteki "memory_profiler" w programie PyCharm
##### >> statsmodel "formula"
![formula_cows](https://i.imgur.com/FSQdDFl.png)

##### >> statsmodel "matrix"
![matrix_cows](https://i.imgur.com/mb2n6Kf.png)