# Hypothesis about solvent start temperature

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Search-for-highly-correlated-features-in-X:" data-toc-modified-id="Search-for-highly-correlated-features-in-X:-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Search for highly correlated features in X:</a></span></li><li><span><a href="#pandas_profiling" data-toc-modified-id="pandas_profiling-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>pandas_profiling</a></span></li></ul></div>

In [1]:
# Import necessary packages
import pandas as pd
pd.options.display.max_columns = None

Import saved DataFrame usind pandas method read_pickle (since it is faster than reading a .csv file):

In [2]:
df = pd.read_pickle('./Data/process_data_df10.pkl')

Remove rows where `qc_salzrckhalt == 0` or `qc_durchfluss == 0`.

In [3]:
mask = (df.qc[['qc_salzrckhalt','qc_durchfluss']] > 0).all(axis=1)
df = df.loc[mask,:]

df.shape

(117559, 343)

Select only features from the `pa` process step:

In [4]:
df_pa = df['pa']

In [5]:
# df_pa.head()

In [6]:
def num_cols(df):
    ''' Shows enumerated dictionary of column 
        names of a pandas DataFrame '''
    
    d = {df.columns.get_loc(c):c for c in df.columns}

    return d

Create a enumerated dictionary with column names from `pa` process step:

In [7]:
dict_pa = num_cols(df_pa)
dict_pa

{0: 'pa_datum',
 1: 'pa_beschichtete_rollenlange_m',
 2: 'pa_bahn-geschwindigkeit_m/min',
 3: 'pa_bad-wechsel_m-pda',
 4: 'pa_bad-wechsel_hw1',
 5: 'pa_bad-wechsel_hw2',
 6: 'pa_bad-wechsel_chlor',
 7: 'pa_bad-wechsel_hw3',
 8: 'pa_raum_temperatur_start_c',
 9: 'pa_temperatur_m-pda-bad_c',
 10: 'pa_cm-pda_0m',
 11: 'pa_cm-pda_500m',
 12: 'pa_cm-pda_1000m',
 13: 'pa_cm-pda_1500m',
 14: 'pa_cm-pda_2000m',
 15: 'pa_cm-pda_2500m',
 16: 'pa_cm-pda_3000m',
 17: 'pa_cm-pda_3500m',
 18: 'pa_cm-pda_4000m',
 19: 'pa_ce-capro_lactam_%',
 20: 'pa_temperatur_n-decan-lsg_chem_vorbereitung_start_c',
 21: 'pa_ctmc_richtwert_%',
 22: 'pa_ctmc_%',
 23: 'pa_temperatur_alkali-lsg_chem_vorbereitung_start_c',
 24: 'pa_temperatur_chlorbad_start_c',
 25: 'pa_chlorkonzentration_0m_ppm',
 26: 'pa_chlorkonzentration_500m_ppm',
 27: 'pa_chlorkonzentration_1000m_ppm',
 28: 'pa_chlorkonzentration_1500m_ppm',
 29: 'pa_chlorkonzentration_2000m_ppm',
 30: 'pa_chlorkonzentration_2500m_ppm',
 31: 'pa_chlorkonzentration_

Select only features related to temperature and humidity:

In [8]:
dict_pa_sub = {k:v for k,v in dict_pa.items() if ('temp' in v) or ('feucht' in v) or (v.startswith('f_'))}

print("{} features selected:".format(len(dict_pa_sub)))
dict_pa_sub

46 features selected:


{8: 'pa_raum_temperatur_start_c',
 9: 'pa_temperatur_m-pda-bad_c',
 20: 'pa_temperatur_n-decan-lsg_chem_vorbereitung_start_c',
 23: 'pa_temperatur_alkali-lsg_chem_vorbereitung_start_c',
 24: 'pa_temperatur_chlorbad_start_c',
 50: 'pa_raum_feuchte_start_%',
 51: 'pa_amin-trockner_temperatur_danfugt_c',
 52: 'pa_amin-trockner_feuchtigkeit_danfugt_bs_0m_%',
 53: 'pa_amin-trockner_feuchtigkeit_danfugt_bs_1000m_%',
 54: 'pa_amin-trockner_feuchtigkeit_danfugt_bs_2000m_%',
 55: 'pa_amin-trockner_feuchtigkeit_danfugt_bs_3000m_%',
 56: 'pa_amin-trockner_feuchtigkeit_danfugt_bs_4000m_%',
 57: 'pa_amin-trockner_feuchtigkeit_danfugt_as_0m',
 58: 'pa_amin-trockner_feuchtigkeit_danfugt_as_1000m',
 59: 'pa_amin-trockner_feuchtigkeit_danfugt_as_2000m',
 60: 'pa_amin-trockner_feuchtigkeit_danfugt_as_3000m',
 61: 'pa_amin-trockner_feuchtigkeit_danfugt_as_4000m',
 62: 'pa_amin-trockner_temperatur_cofely_c',
 63: 'pa_amin-trockner_feuchtigkeit_cofely_0m_%',
 64: 'pa_amin-trockner_feuchtigkeit_cofely_600m_

Save the selected columns in the variable X:

In [9]:
X = df.pa.iloc[:,list(dict_pa_sub)]
X.sample(5)

Unnamed: 0,pa_raum_temperatur_start_c,pa_temperatur_m-pda-bad_c,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_c,pa_temperatur_alkali-lsg_chem_vorbereitung_start_c,pa_temperatur_chlorbad_start_c,pa_raum_feuchte_start_%,pa_amin-trockner_temperatur_danfugt_c,pa_amin-trockner_feuchtigkeit_danfugt_bs_0m_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_1000m_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_2000m_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_3000m_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_4000m_%,pa_amin-trockner_feuchtigkeit_danfugt_as_0m,pa_amin-trockner_feuchtigkeit_danfugt_as_1000m,pa_amin-trockner_feuchtigkeit_danfugt_as_2000m,pa_amin-trockner_feuchtigkeit_danfugt_as_3000m,pa_amin-trockner_feuchtigkeit_danfugt_as_4000m,pa_amin-trockner_temperatur_cofely_c,pa_amin-trockner_feuchtigkeit_cofely_0m_%,pa_amin-trockner_feuchtigkeit_cofely_600m_%,pa_amin-trockner_feuchtigkeit_cofely_1200m_%,pa_amin-trockner_feuchtigkeit_cofely_1800m_%,pa_amin-trockner_feuchtigkeit_cofely_2400m_%,pa_amin-trockner_feuchtigkeit_cofely_3000m_%,pa_amin-trockner_feuchtigkeit_cofely_3600m_%,pa_amin-trockner_feuchtigkeit_cofely_4200m_%,pa_vertikale_feuchte_oben_start_%,pa_vertikale_feuchte_oben_mitte_%,pa_vertikale_feuchte_oben_ende_%,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_ref_low_c,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_ref_high_c,pa_temperatur_alkali-lsg_chem_vorbereitung_start_ref_low_c,pa_temperatur_alkali-lsg_chem_vorbereitung_start_ref_high_c,pa_amin-trockner_temperatur_danfugt_ref_c,pa_amin-trockner_feuchtigkeit_danfugt_bs_ref_low_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_ref_high_%,pa_amin-trockner_feuchtigkeit_danfugt_as_ref_low,pa_amin-trockner_feuchtigkeit_danfugt_as_ref_high,pa_amin-trockner_feuchtigkeit_cofely_ref_low_%,pa_amin-trockner_feuchtigkeit_cofely_ref_high_%,pa_vertikale_feuchte_oben_ref_low_%,pa_vertikale_feuchte_oben_ref_high_%,f_danfugt_bs_median,f_danfugt_as_median,f_codfely_median,temp_median
12916,24.0,22.0,26.0,23.0,20.0,25.0,30.0,51.0,59.0,59.0,60.0,60.0,48.0,56.0,56.0,57.0,57.0,25.0,75.9,74.9,76.1,74.8,784.9,75.6,75.2,75.2,25.0,36.0,36.0,24.0,28.0,21.0,22.5,30.0,57.0,58.0,57.0,58.0,79.0,81.0,40.0,42.0,59.0,56.0,75.4,22.2
1739,22.0,19.0,27.3,22.8,20.0,24.0,35.0,56.0,60.0,60.0,60.0,60.0,56.0,60.0,61.0,61.0,59.0,25.0,83.7,85.1,82.8,83.6,84.2,82.1,82.8,82.0,37.0,41.0,40.0,24.0,28.0,21.0,22.5,35.0,57.0,60.0,57.0,60.0,77.0,80.0,30.0,50.0,60.0,60.0,83.2,21.7
92502,24.0,17.0,26.69,22.97,20.0,23.0,30.0,62.0,59.0,59.0,60.0,60.0,58.0,60.0,61.0,59.0,60.0,25.0,76.4,76.8,75.8,74.3,76.0,73.9,74.7,75.4,35.0,37.0,38.0,22.0,24.0,21.0,22.5,30.0,56.0,58.0,56.0,58.0,79.0,81.0,39.0,42.0,60.0,60.0,75.6,21.8
11541,22.6,21.0,26.26,22.71,20.0,25.0,30.0,62.0,62.0,63.0,62.0,63.0,61.0,62.0,62.0,62.0,63.0,25.0,82.4,83.7,83.0,84.2,82.8,82.5,83.0,82.6,42.0,44.0,44.0,24.0,28.0,21.0,22.5,35.0,57.0,60.0,57.0,60.0,77.0,80.0,30.0,50.0,62.0,62.0,82.9,21.7
24544,24.0,21.0,28.73,24.06,20.0,38.0,35.0,68.0,63.0,60.0,60.0,60.0,59.0,60.0,60.0,60.0,60.0,25.0,82.7,83.6,83.0,83.0,83.6,83.4,83.1,81.0,44.0,45.0,45.0,24.0,28.0,21.0,22.5,35.0,57.0,60.0,57.0,60.0,77.0,80.0,30.0,50.0,60.0,60.0,83.05,23.1


In [10]:
y = df.qc[['qc_salzrckhalt']]
y.sample(5)

Unnamed: 0,qc_salzrckhalt
93735,99.737125
35913,99.769125
84156,99.807849
29104,99.431178
57550,99.570717


### Search for highly correlated features in X:

Rearch and remove highly correlated feature in X, since they will influence badly our model.
First, let us replace all the 0's in X fo NaN, since they don't seem to make sense:

In [11]:
X.replace(0,float('nan'),inplace=True)

In [12]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import os

In [13]:
# Creates folder "./Figures/" and display a message
if not os.path.isdir('./Figures'):
    %mkdir "Figures"
    if os.path.isdir('./Figures'):
        print('Folder created')


Heatmaps were created with:

    plt.figure(figsize=(40, 30))
    p = sns.heatmap(X.corr(method='pearson'),annot=True,linewidth=.5)
    plt.savefig('./Figures/pa_heatmap_pearson.pdf')

    plt.figure(figsize=(40, 30))
    p = sns.heatmap(X.corr(method='spearman'),annot=True,linewidth=.5)
    plt.savefig('./Figures/pa_heatmap_spearman.pdf')

In [14]:
import numpy as np

In [15]:
def get_high_corr(df,method="pearson",threshold=0.95):
    '''Returns highly correlated features in a DataFrame
       based on a threshold value'''
    # Create correlation matrix
    corr_matrix = df.corr(method).abs()

    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

    # Find index of feature columns with correlation greater than 0.95
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    
    return to_drop

In [16]:
p90 = get_high_corr(X,method='pearson',threshold=.90)
s90 = get_high_corr(X,method='spearman',threshold=.90)

    p95 = get_high_corr(X,method='pearson',threshold=.95)
    s95 = get_high_corr(X,method='spearman',threshold=.95)

In [17]:
s90.remove('pa_temperatur_n-decan-lsg_chem_vorbereitung_start_ref_high_c')

In [18]:
s90

['pa_amin-trockner_feuchtigkeit_danfugt_bs_2000m_%',
 'pa_amin-trockner_feuchtigkeit_danfugt_bs_3000m_%',
 'pa_amin-trockner_feuchtigkeit_danfugt_bs_4000m_%',
 'pa_amin-trockner_feuchtigkeit_danfugt_as_2000m',
 'pa_amin-trockner_feuchtigkeit_danfugt_as_3000m',
 'pa_amin-trockner_feuchtigkeit_danfugt_as_4000m',
 'pa_vertikale_feuchte_oben_mitte_%',
 'pa_vertikale_feuchte_oben_ende_%',
 'pa_amin-trockner_temperatur_danfugt_ref_c',
 'pa_amin-trockner_feuchtigkeit_danfugt_as_ref_low',
 'pa_amin-trockner_feuchtigkeit_danfugt_as_ref_high',
 'pa_amin-trockner_feuchtigkeit_cofely_ref_low_%',
 'pa_amin-trockner_feuchtigkeit_cofely_ref_high_%',
 'pa_vertikale_feuchte_oben_ref_high_%',
 'f_danfugt_bs_median',
 'f_danfugt_as_median',
 'f_codfely_median']

In [19]:
X.drop(columns=s90,inplace=True)

In [20]:
X.describe()

Unnamed: 0,pa_raum_temperatur_start_c,pa_temperatur_m-pda-bad_c,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_c,pa_temperatur_alkali-lsg_chem_vorbereitung_start_c,pa_temperatur_chlorbad_start_c,pa_raum_feuchte_start_%,pa_amin-trockner_temperatur_danfugt_c,pa_amin-trockner_feuchtigkeit_danfugt_bs_0m_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_1000m_%,pa_amin-trockner_feuchtigkeit_danfugt_as_0m,pa_amin-trockner_feuchtigkeit_danfugt_as_1000m,pa_amin-trockner_temperatur_cofely_c,pa_amin-trockner_feuchtigkeit_cofely_0m_%,pa_amin-trockner_feuchtigkeit_cofely_600m_%,pa_amin-trockner_feuchtigkeit_cofely_1200m_%,pa_amin-trockner_feuchtigkeit_cofely_1800m_%,pa_amin-trockner_feuchtigkeit_cofely_2400m_%,pa_amin-trockner_feuchtigkeit_cofely_3000m_%,pa_amin-trockner_feuchtigkeit_cofely_3600m_%,pa_amin-trockner_feuchtigkeit_cofely_4200m_%,pa_vertikale_feuchte_oben_start_%,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_ref_low_c,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_ref_high_c,pa_temperatur_alkali-lsg_chem_vorbereitung_start_ref_low_c,pa_temperatur_alkali-lsg_chem_vorbereitung_start_ref_high_c,pa_amin-trockner_feuchtigkeit_danfugt_bs_ref_low_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_ref_high_%,pa_vertikale_feuchte_oben_ref_low_%,temp_median
count,116692.0,117151.0,116535.0,114129.0,117151.0,116602.0,116934.0,116746.0,117063.0,116746.0,117063.0,116301.0,117215.0,117099.0,117049.0,116696.0,115085.0,114496.0,114106.0,112369.0,117165.0,117081.0,117081.0,117081.0,117081.0,117559.0,117559.0,117559.0,117235.0
mean,23.926255,19.200664,27.389272,23.001624,20.134775,31.875142,33.284588,59.336715,58.644823,57.319435,56.840394,27.021695,76.985111,76.924623,76.978775,83.624296,76.988609,77.054047,78.117391,77.045941,40.616549,23.720091,27.396606,21.0,22.478212,56.81474,59.517417,33.472682,21.950075
std,1.181166,5.028491,2.235485,0.733455,0.430151,11.013958,3.011694,8.812938,12.291398,17.502484,4.837487,70.294937,21.263701,20.428956,22.345168,234.394333,21.46398,24.688016,37.946631,23.86152,51.516428,0.661737,1.3852,0.0,0.102076,2.500461,2.163595,4.648474,0.769706
min,19.0,13.0,16.0,18.0,17.0,18.0,25.0,25.0,44.0,7.0,45.0,20.0,7.4,20.0,7.58,7.52,20.0,9.0,7.2,19.9,15.0,22.0,24.0,21.0,22.0,50.0,55.0,30.0,17.9
25%,23.0,18.0,26.4,22.7,20.0,24.0,30.0,53.0,55.0,51.0,53.0,25.0,74.8,74.8,74.8,74.9,74.8,74.8,74.8,74.8,35.0,24.0,28.0,21.0,22.5,57.0,58.0,30.0,21.5
50%,24.0,19.0,27.8,23.0,20.0,28.0,35.0,59.0,59.0,58.0,58.0,25.0,76.0,76.1,76.0,75.9,76.0,76.0,76.0,76.2,39.0,24.0,28.0,21.0,22.5,57.0,60.0,30.0,22.0
75%,25.0,20.0,29.0,23.33,20.0,38.0,35.0,64.0,61.0,61.0,60.0,25.0,82.4,82.2,82.3,82.2,82.3,82.3,82.4,82.3,44.0,24.0,28.0,21.0,22.5,57.0,60.0,39.0,22.4
max,40.0,203.0,32.0,29.06,22.4,223.0,65.0,91.0,450.0,603.0,73.0,2525.0,737.0,756.9,792.0,8301.0,784.9,836.0,883.3,831.0,4346.0,24.0,28.0,21.0,22.5,65.0,67.0,40.0,26.0


And Data Cleaning continues...
We spotted some columns which have unreasonable values.

Following strategy can be used to see one plot after the other (using a iterator):

```python
kk = iter((num_cols(X).items()))
```

```python
# next(kk)
```    

```python
idkk, textkk = next(kk)

print(idkk)
print(textkk)

X.iloc[:,[idkk]].plot(kind='line',style='o')
plt.title('({}) {}'.format(idkk,textkk))
plt.legend("")
plt.show()
```

```python
dict_X = num_cols(X)
dict_X
```

After analysing graphically each feature, we created the following dictionary defining the course of action to be taken

In [21]:
dict_clean={
    'pa_raum_temperatur_start_c':                                 30 ,
    'pa_temperatur_m-pda-bad_c':                                  50 ,
    'pa_raum_feuchte_start_%':                                    100,
    'pa_amin-trockner_feuchtigkeit_danfugt_bs_1000m_%':           100,
    'pa_amin-trockner_feuchtigkeit_danfugt_as_0m':                100,
    'pa_amin-trockner_temperatur_cofely_c':                       100,
    'pa_amin-trockner_feuchtigkeit_cofely_0m_%':                  100,
    'pa_amin-trockner_feuchtigkeit_cofely_600m_%':                100,
    'pa_amin-trockner_feuchtigkeit_cofely_1200m_%':               100,
    'pa_amin-trockner_feuchtigkeit_cofely_1800m_%':               100,
    'pa_amin-trockner_feuchtigkeit_cofely_2400m_%':               100,
    'pa_amin-trockner_feuchtigkeit_cofely_3000m_%':               100,
    'pa_amin-trockner_feuchtigkeit_cofely_3600m_%':               100,
    'pa_amin-trockner_feuchtigkeit_cofely_4200m_%':               100,
    'pa_vertikale_feuchte_oben_start_%':                          100,
    'pa_temperatur_alkali-lsg_chem_vorbereitung_start_ref_low_c': "drop",
    'pa_temperatur_alkali-lsg_chem_vorbereitung_start_ref_high_c':"drop"
};

Remove values above the threshold defined in the `dict_clean` and drops the respective columns:

In [22]:
for k,v in dict_clean.items():
    if type(v) is int:
        mask = X[k]>v
        # substitute outliers for NaN
        X.loc[mask,:] = float('nan')
        
    elif v=="drop":
        X.drop(columns=k,inplace=True)        

Let us have a quick glimpse into X:

In [23]:
X.describe()

Unnamed: 0,pa_raum_temperatur_start_c,pa_temperatur_m-pda-bad_c,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_c,pa_temperatur_alkali-lsg_chem_vorbereitung_start_c,pa_temperatur_chlorbad_start_c,pa_raum_feuchte_start_%,pa_amin-trockner_temperatur_danfugt_c,pa_amin-trockner_feuchtigkeit_danfugt_bs_0m_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_1000m_%,pa_amin-trockner_feuchtigkeit_danfugt_as_0m,pa_amin-trockner_feuchtigkeit_danfugt_as_1000m,pa_amin-trockner_temperatur_cofely_c,pa_amin-trockner_feuchtigkeit_cofely_0m_%,pa_amin-trockner_feuchtigkeit_cofely_600m_%,pa_amin-trockner_feuchtigkeit_cofely_1200m_%,pa_amin-trockner_feuchtigkeit_cofely_1800m_%,pa_amin-trockner_feuchtigkeit_cofely_2400m_%,pa_amin-trockner_feuchtigkeit_cofely_3000m_%,pa_amin-trockner_feuchtigkeit_cofely_3600m_%,pa_amin-trockner_feuchtigkeit_cofely_4200m_%,pa_vertikale_feuchte_oben_start_%,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_ref_low_c,pa_temperatur_n-decan-lsg_chem_vorbereitung_start_ref_high_c,pa_amin-trockner_feuchtigkeit_danfugt_bs_ref_low_%,pa_amin-trockner_feuchtigkeit_danfugt_bs_ref_high_%,pa_vertikale_feuchte_oben_ref_low_%,temp_median
count,114854.0,115313.0,114791.0,112480.0,115313.0,114764.0,115096.0,114908.0,115225.0,114908.0,115225.0,114463.0,115377.0,115261.0,115211.0,114858.0,113247.0,112658.0,112268.0,110531.0,115327.0,115243.0,115243.0,115721.0,115721.0,115721.0,115397.0
mean,23.916037,19.074233,27.386475,23.001336,20.135241,31.726798,33.288081,59.357174,58.323644,56.894662,56.835366,24.72959,76.409832,76.429342,76.36495,76.388838,76.420435,76.341142,76.283528,76.3781,39.479974,23.719679,27.395807,56.828709,59.524512,33.468402,21.950775
std,1.090846,1.581524,2.241238,0.736078,0.431811,9.57894,3.013904,8.823868,5.311509,8.149848,4.836357,5.187193,9.808239,9.503969,9.690661,9.626725,9.561492,9.855188,10.063117,9.730946,6.326966,0.662204,1.386032,2.482466,2.15553,4.646863,0.770629
min,19.0,13.0,16.0,18.0,17.0,18.0,25.0,25.0,44.0,7.0,45.0,20.0,7.4,20.0,7.58,7.52,20.0,9.0,7.2,19.9,15.0,22.0,24.0,50.0,55.0,30.0,17.9
25%,23.0,18.0,26.4,22.7,20.0,24.0,30.0,53.0,55.0,51.0,53.0,25.0,74.8,74.8,74.8,74.9,74.8,74.8,74.8,74.8,35.0,24.0,28.0,57.0,58.0,30.0,21.5
50%,24.0,19.0,27.79,23.0,20.0,28.0,35.0,59.0,59.0,58.0,58.0,25.0,76.0,76.1,76.0,75.9,76.0,76.0,76.0,76.1,39.0,24.0,28.0,57.0,60.0,30.0,22.0
75%,25.0,20.0,29.0,23.33,20.0,38.0,35.0,64.0,61.0,61.0,60.0,25.0,82.3,82.2,82.3,82.2,82.2,82.3,82.4,82.3,44.0,24.0,28.0,57.0,60.0,39.0,22.4
max,27.0,25.0,32.0,29.06,22.4,83.5,65.0,91.0,79.0,81.0,73.0,83.0,91.9,86.1,86.5,87.7,93.3,87.4,93.2,86.2,58.0,24.0,28.0,65.0,67.0,40.0,26.0


Looks clean 😎

### pandas_profiling

In [24]:
from pandas_profiling import ProfileReport

ModuleNotFoundError: No module named 'pandas_profiling'

In [None]:
profile = ProfileReport(X, title='Pandas Profiling Report', html={'style':{'full_width':True}})

In [None]:
profile