In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.colors as mcolors
from matplotlib.patches import Patch
from pathlib import Path
from multiprocessing.dummy import Pool as ThreadPool
from collections import defaultdict
from natsort import natsorted
import tsfresh as tf

In [2]:
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

pd.set_option('max_columns', None)

In [3]:
data_path = Path('data_transformed')
events_names = {0: 'Normal',
                1: 'Abrupt Increase of BSW',
                2: 'Spurious Closure of DHSV',
                3: 'Severe Slugging',
                4: 'Flow Instability',
                5: 'Rapid Productivity Loss',
                6: 'Quick Restriction in PCK',
                7: 'Scaling in PCK',
                8: 'Hydrate in Production Line'
               }
columns = ['P-PDG',
           'P-TPT',
           'T-TPT',
           'P-MON-CKP',
           'T-JUS-CKP',
           'QGL',
           'class']
rare_threshold = 0.01

In [4]:
def class_and_file_generator(data_path, real=False, simulated=False, drawn=False):
    for class_path in data_path.iterdir():
        if class_path.is_dir():
            class_code = int(class_path.stem)
            for instance_path in class_path.iterdir():
                if (instance_path.suffix == '.csv'):
                    if (simulated and instance_path.stem.startswith('SIMULATED')) or \
                       (drawn and instance_path.stem.startswith('DRAWN')) or \
                       (real and (not instance_path.stem.startswith('SIMULATED')) and \
                       (not instance_path.stem.startswith('DRAWN'))):
                        yield class_code, instance_path

In [5]:
real_instances = list(class_and_file_generator(data_path, real=True, simulated=False, drawn=False))
simulated_instances = list(class_and_file_generator(data_path, real=False, simulated=True, drawn=False))

In [6]:
#simulirani primjeri za znacajku 'well' imaju vrijednost -1

def load_instance(instance):
    class_code, instance_path = instance
    try:
        well, instance_id = instance_path.stem.split('_')
        if 'WELL' in well:
            well = well.split('-')[1]
        if 'SIMULATED' in well:
            well = '-1'
        df = pd.read_csv(instance_path, index_col='timestamp', parse_dates=['timestamp'])
        assert (df.columns == columns).all(), "invalid columns in the file {}: {}".format(str(instance_path), str(df.columns.tolist()))
        df['class_code'] = class_code
        df['well'] = well
        df['instance_id'] = instance_id
        df = df[['class_code', 'well', 'instance_id'] + columns]
        return df
    except Exception as e:
        raise Exception('error reading file {}: {}'.format(instance_path, e))
        
def load_instances(instances):
    pool = ThreadPool()
    all_df = []
    try:
        for df in pool.imap_unordered(load_instance, instances):
            all_df.append(df)
    finally:
        pool.terminate()
    return all_df

In [7]:
#ucitavanje stvarnih i simuliranih csv primjera u df-ove

instances = real_instances + simulated_instances
data = load_instances(instances)

In [8]:
#pretvorba identifikatora dogadaja i oznake busotine u integer tipove podataka

for df in data:
    df['instance_id'] = pd.to_numeric(df['instance_id'])
    df['well'] = pd.to_numeric(df['well'])
    
print(df.dtypes)
data[0].head()

class_code       int64
well             int64
instance_id      int64
P-PDG          float64
P-TPT          float64
T-TPT          float64
P-MON-CKP      float64
T-JUS-CKP      float64
QGL            float64
class          float64
dtype: object


Unnamed: 0_level_0,class_code,well,instance_id,P-PDG,P-TPT,T-TPT,P-MON-CKP,T-JUS-CKP,QGL,class
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-09-25 01:00:30,3,14,20170925010031,23423380.0,13703300.0,56.558224,2935319.0,36.081306,1.625385,3.0
2017-09-25 01:00:40,3,14,20170925010031,23422210.0,13700140.0,56.56401,2989604.0,36.061686,1.541092,3.0
2017-09-25 01:00:50,3,14,20170925010031,23421040.0,13697910.0,56.569884,3042945.0,36.04362,1.513173,3.0
2017-09-25 01:01:00,3,14,20170925010031,23420340.0,13696000.0,56.576066,3068551.0,36.044957,1.496669,3.0
2017-09-25 01:01:10,3,14,20170925010031,23419500.0,13693530.0,56.581859,3050824.0,36.050174,1.411177,3.0


In [9]:
#koliko je null vrijednosti po znacajci u skupu podataka

x = None
for dataFrame in data:
    if x is None:
        x = dataFrame.isna().sum(axis = 0)
    else:
        x += dataFrame.isna().sum(axis = 0)
print(x)

class_code           0
well                 0
instance_id          0
P-PDG              545
P-TPT              567
T-TPT           581249
P-MON-CKP       112190
T-JUS-CKP       170341
QGL            3717542
class              446
dtype: int64


In [10]:
#pretvorba null vrijednosti u konacne vrijednosti uporabom funkcije impute(...)
#dokumentacija https://tsfresh.readthedocs.io/en/latest/api/tsfresh.utilities.html#tsfresh.utilities.dataframe_functions.impute

import tsfresh as tf
print(data[0].dtypes)
imputed_data = []
for df in data:
    df2 = tf.utilities.dataframe_functions.impute(df.loc[:, df.columns != 'timestamp'])
    imputed_df = df2
    imputed_data.append(imputed_df)

class_code       int64
well             int64
instance_id      int64
P-PDG          float64
P-TPT          float64
T-TPT          float64
P-MON-CKP      float64
T-JUS-CKP      float64
QGL            float64
class          float64
dtype: object




In [11]:
#provjera jesu li uklonjene sve null vrijednosti

x = None
for df in imputed_data:
    if x is None:
        x = df.isna().sum(axis = 0)
    else:
        x += df.isna().sum(axis = 0)
print(x)

class_code     0
well           0
instance_id    0
P-PDG          0
P-TPT          0
T-TPT          0
P-MON-CKP      0
T-JUS-CKP      0
QGL            0
class          0
dtype: int64


In [12]:
#stvaranje prozora, svaki prozor sadrzi 30 redaka

data_windowed = []

for df in imputed_data:
    for i in range(df.shape[0]//90):
        window = df.iloc[i*90:(i+1)*90]
        data_windowed.append(window)

In [13]:
print(imputed_data[666]['class'].unique())
print(imputed_data[666]['class_code'].unique())

[0.]
[0]


In [14]:
len(data_windowed)

52064

In [15]:
def threeW_feature_extraction(data):
    all_feats = []
    for i, df in enumerate(data):
        if not i % 1000:
            print(i, end=' ')
        all_feats.append(my_extract_features(df))
    return all_feats

In [16]:
def my_extract_features(df):
    df = df[['P-PDG', 'P-TPT', 'T-TPT', 'P-MON-CKP', 'T-JUS-CKP', 'QGL', 'class']]

    maximum = tf.feature_extraction.feature_calculators.maximum(df).add_prefix('max_')
    minimum = tf.feature_extraction.feature_calculators.minimum(df).add_prefix('min_')
    mean_change = tf.feature_extraction.feature_calculators.mean_change(df)
    avg_scnd_derivative = tf.feature_extraction.feature_calculators.mean_second_derivative_central(df)

    mean = []
    variance = []
    skewness = []
    kurtosis = []
    abs_fft = []
    median = []
    quantile = []

    for i in ['P-PDG', 'P-TPT', 'T-TPT', 'P-MON-CKP', 'T-JUS-CKP', 'QGL', 'class']:
        median.append(tf.feature_extraction.feature_calculators.median(df[i]))
        quantile.append(tf.feature_extraction.feature_calculators.quantile(df[i], 0.2))
        mean.append(list(tf.feature_extraction.feature_calculators.fft_aggregated(df[i], [{'aggtype':'centroid'}]))[0][1])
        variance.append(list(tf.feature_extraction.feature_calculators.fft_aggregated(df[i], [{'aggtype':'variance'}]))[0][1])
        skewness.append(list(tf.feature_extraction.feature_calculators.fft_aggregated(df[i], [{'aggtype':'skew'}]))[0][1])
        kurtosis.append(list(tf.feature_extraction.feature_calculators.fft_aggregated(df[i], [{'aggtype':'kurtosis'}]))[0][1])
        abs_fft.append(list(tf.feature_extraction.feature_calculators.fft_coefficient(df[i], [{'coeff':0, 'attr':'abs'}]))[0][1])

    data_max = pd.DataFrame(maximum).transpose()
    data_min = pd.DataFrame(minimum).transpose()
    data_mean = pd.DataFrame(mean).transpose().rename(columns={0:'me_P-PDG', 1:'me_P-TPT', 2:'me_T-TPT', 3:'me_P-MON-CKP', 4:'me_T-JUS-CKP', 5:'me_QGL', 6:'me_class'})
    data_variance = pd.DataFrame(variance).transpose().rename(columns={0:'var_P-PDG', 1:'var_P-TPT', 2:'var_T-TPT', 3:'var_P-MON-CKP', 4:'var_T-JUS-CKP', 5:'var_QGL', 6:'var_class'})
    data_skewness = pd.DataFrame(skewness).transpose().rename(columns={0:'sk_P-PDG', 1:'sk_P-TPT', 2:'sk_T-TPT', 3:'sk_P-MON-CKP', 4:'sk_T-JUS-CKP', 5:'sk_QGL', 6:'sk_class'})
    data_kurtosis = pd.DataFrame(kurtosis).transpose().rename(columns={0:'kt_P-PDG', 1:'kt_P-TPT', 2:'kt_T-TPT', 3:'kt_P-MON-CKP', 4:'kt_T-JUS-CKP', 5:'kt_QGL', 6:'kt_class'})
    data_abs_fft = pd.DataFrame(abs_fft).transpose().rename(columns={0:'afft_P-PDG', 1:'afft_P-TPT', 2:'afft_T-TPT', 3:'afft_P-MON-CKP', 4:'afft_T-JUS-CKP', 5:'afft_QGL', 6:'afft_class'})

    data_median = pd.DataFrame([median]).rename(columns={0:'med_P-PDG', 1:'med_P-TPT', 2:'med_T-TPT', 3:'med_P-MON-CKP', 4:'med_T-JUS-CKP', 5:'med_QGL', 6:'med_class'})
    data_quantile = pd.DataFrame([quantile]).rename(columns={0:'qtl_P-PDG', 1:'qtl_P-TPT', 2:'qtl_T-TPT', 3:'qtl_P-MON-CKP', 4:'qtl_T-JUS-CKP', 5:'qtl_QGL', 6:'qtl_class'})
    data_mean_change = pd.DataFrame(mean_change).transpose().rename(columns={0:'mc_P-PDG', 1:'mc_P-TPT', 2:'mc_T-TPT', 3:'mc_P-MON-CKP', 4:'mc_T-JUS-CKP', 5:'mc_QGL', 6:'mc_class'})
    data_asd = pd.DataFrame(avg_scnd_derivative).transpose().rename(columns={0:'asd_P-PDG', 1:'asd_P-TPT', 2:'asd_T-TPT', 3:'asd_P-MON-CKP', 4:'asd_T-JUS-CKP', 5:'asd_QGL', 6:'asd_class'})

    features = pd.concat([data_mean, data_variance, data_skewness, data_kurtosis, data_abs_fft, data_min, data_max, data_median, data_quantile, data_mean_change, data_asd], axis=1)

    #ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
    #coeff_of_variation = tf.feature_extraction.feature_calculators.variation_coefficient(df)

    #ValueError: If using all scalar values, you must pass an index
    #langevin_1 = tf.feature_extraction.feature_calculators.friedrich_coefficients(df['QGL'], [{'m':1, 'r':0.2, 'coeff':0}])
    #langevin_3 = tf.feature_extraction.feature_calculators.friedrich_coefficients(df, [{'m':3, 'r':0.2, 'coeff':1}])
    return features

    

In [17]:
#provjera jesu li uklonjene sve null vrijednosti

x = None
for df in data_windowed:
    if x is None:
        x = df.isna().sum(axis = 0)
    else:
        x += df.isna().sum(axis = 0)
print(x)

class_code     0
well           0
instance_id    0
P-PDG          0
P-TPT          0
T-TPT          0
P-MON-CKP      0
T-JUS-CKP      0
QGL            0
class          0
dtype: int64


In [18]:
df = threeW_feature_extraction(data_windowed)

0 

  return y.dot(np.arange(len(y), dtype=float)**moment) / y.sum()


50 100 150 200 250 300 350 400 450 500 550 600 650 700 750 800 850 900 950 1000 1050 1100 1150 1200 1250 1300 1350 1400 1450 1500 1550 1600 1650 1700 1750 1800 1850 1900 1950 2000 2050 2100 2150 2200 2250 2300 2350 2400 2450 2500 2550 2600 2650 2700 2750 2800 2850 2900 2950 3000 3050 3100 3150 3200 3250 3300 3350 3400 3450 3500 3550 3600 3650 3700 3750 3800 3850 3900 3950 4000 4050 4100 4150 4200 4250 4300 4350 4400 4450 4500 4550 4600 4650 4700 4750 4800 4850 4900 4950 5000 5050 5100 5150 5200 5250 5300 5350 5400 5450 5500 5550 5600 5650 5700 5750 5800 5850 5900 5950 6000 6050 6100 6150 6200 6250 6300 6350 6400 6450 6500 6550 6600 6650 6700 6750 6800 6850 6900 6950 7000 7050 7100 7150 7200 7250 7300 7350 7400 7450 7500 7550 7600 7650 7700 7750 7800 7850 7900 7950 8000 8050 8100 8150 8200 8250 8300 8350 8400 8450 8500 8550 8600 8650 8700 8750 8800 8850 8900 8950 9000 9050 9100 9150 9200 9250 9300 9350 9400 9450 9500 9550 9600 9650 9700 9750 9800 9850 9900 9950 10000 10050 10100 10150 1

In [19]:
df_all = pd.concat(df, ignore_index=True)

In [64]:
df_all.shape

(52064, 77)

In [76]:
df_all

Unnamed: 0,me_P-PDG,me_P-TPT,me_T-TPT,me_P-MON-CKP,me_T-JUS-CKP,me_QGL,me_class,var_P-PDG,var_P-TPT,var_T-TPT,...,mc_T-JUS-CKP,mc_QGL,mc_class,asd_P-PDG,asd_P-TPT,asd_T-TPT,asd_P-MON-CKP,asd_T-JUS-CKP,asd_QGL,asd_class
0,0.091449,0.480035,1.522645,1.035564,0.181932,6.576472,0.0,2.251824,11.645079,35.051435,...,0.006273,-0.001981,0.0,-2.898539,4.500000,2.171169e-04,-11.627922,-9.100325e-05,-0.000028,0.0
0,0.025031,0.380836,0.410673,0.274794,0.116528,5.958092,0.0,0.608550,9.312612,9.844518,...,0.004076,0.002104,0.0,-24.954545,7.244318,-9.718182e-05,-10.044886,-3.556818e-06,-0.000354,0.0
0,0.005865,0.124705,0.756938,0.387642,0.110258,5.703256,0.0,0.133182,3.115033,18.055718,...,0.003907,-0.002238,0.0,36.534091,-44.295455,4.259091e-05,-5.702841,5.681818e-09,-0.001238,0.0
0,0.008177,0.036285,0.364539,0.249584,0.110065,5.980700,0.0,0.170156,0.846653,8.935886,...,-0.003921,0.006246,0.0,-33.261364,-46.130682,8.870455e-05,60.433523,2.357955e-05,-0.001682,0.0
0,0.175210,1.008744,2.226190,1.991986,3.706471,6.096720,0.0,3.436745,23.231825,49.323413,...,-0.176403,-0.010890,0.0,38.414773,9.704545,-1.061960e-03,900.910795,-2.838023e-03,0.002397,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.000013,0.044916,0.008385,0.001030,0.172274,,0.0,0.000325,1.062805,0.210172,...,-0.007221,0.000000,0.0,0.005682,0.169318,3.011364e-08,-0.327273,1.720455e-05,0.000000,0.0
0,0.000014,0.050164,0.006822,0.000785,0.144639,,0.0,0.000358,1.299146,0.165866,...,-0.005904,0.000000,0.0,0.000000,1.260227,-2.840909e-09,-0.065909,3.284659e-05,0.000000,0.0
0,0.000013,0.044922,0.006214,0.001060,0.174635,,0.0,0.000344,1.058092,0.156553,...,-0.007196,0.000000,0.0,0.011364,-8.008523,1.863636e-07,0.078409,1.998068e-04,0.000000,0.0
0,0.000011,0.039938,0.005578,0.002457,0.234883,,0.0,0.000284,0.964851,0.139383,...,-0.006739,0.000000,0.0,-0.017045,22.153409,-4.232955e-07,1.621591,3.689148e-04,0.000000,0.0


In [20]:
class_codes = []
for df2 in data_windowed:
    class_codes.append(df2['class_code'].unique())
df_classes = pd.DataFrame(class_codes).rename(columns = {0:'class_code'})

In [21]:
df_classes.tail()

Unnamed: 0,class_code
52059,2
52060,2
52061,2
52062,2
52063,2


In [24]:
data_windowed[3].head(30)

Unnamed: 0_level_0,class_code,well,instance_id,P-PDG,P-TPT,T-TPT,P-MON-CKP,T-JUS-CKP,QGL,class
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-09-25 01:45:30,3,14,20170925010031,24161488.0,15136973.0,53.103125,1193386.6,35.026269,1.44969,3.0
2017-09-25 01:45:40,3,14,20170925010031,24163605.0,15151287.0,53.023225,1198757.8,35.02122,1.378607,3.0
2017-09-25 01:45:50,3,14,20170925010031,24165549.0,15171095.0,52.920851,1203075.9,35.01617,1.298327,3.0
2017-09-25 01:46:00,3,14,20170925010031,24166095.0,15184504.0,52.855164,1212416.9,35.011121,1.27061,3.0
2017-09-25 01:46:10,3,14,20170925010031,24166162.0,15199568.0,52.799512,1214917.0,35.006072,1.248529,3.0
2017-09-25 01:46:20,3,14,20170925010031,24166829.0,15216823.0,52.744863,1216250.2,35.001024,1.216666,3.0
2017-09-25 01:46:30,3,14,20170925010031,24167040.0,15232434.0,52.689845,1215462.4,34.995974,1.251921,3.0
2017-09-25 01:46:40,3,14,20170925010031,24166172.0,15245427.0,52.652402,1216113.8,34.990925,1.284521,3.0
2017-09-25 01:46:50,3,14,20170925010031,24165565.0,15259790.0,52.63763,1216659.0,34.985876,1.237393,3.0
2017-09-25 01:47:00,3,14,20170925010031,24163428.0,15276994.0,52.604342,1219576.0,34.980826,1.254804,3.0


In [25]:
df_all.head()

Unnamed: 0,me_P-PDG,me_P-TPT,me_T-TPT,me_P-MON-CKP,me_T-JUS-CKP,me_QGL,me_class,var_P-PDG,var_P-TPT,var_T-TPT,var_P-MON-CKP,var_T-JUS-CKP,var_QGL,var_class,sk_P-PDG,sk_P-TPT,sk_T-TPT,sk_P-MON-CKP,sk_T-JUS-CKP,sk_QGL,sk_class,kt_P-PDG,kt_P-TPT,kt_T-TPT,kt_P-MON-CKP,kt_T-JUS-CKP,kt_QGL,kt_class,afft_P-PDG,afft_P-TPT,afft_T-TPT,afft_P-MON-CKP,afft_T-JUS-CKP,afft_QGL,afft_class,min_P-PDG,min_P-TPT,min_T-TPT,min_P-MON-CKP,min_T-JUS-CKP,min_QGL,min_class,max_P-PDG,max_P-TPT,max_T-TPT,max_P-MON-CKP,max_T-JUS-CKP,max_QGL,max_class,med_P-PDG,med_P-TPT,med_T-TPT,med_P-MON-CKP,med_T-JUS-CKP,med_QGL,med_class,qtl_P-PDG,qtl_P-TPT,qtl_T-TPT,qtl_P-MON-CKP,qtl_T-JUS-CKP,qtl_QGL,qtl_class,mc_P-PDG,mc_P-TPT,mc_T-TPT,mc_P-MON-CKP,mc_T-JUS-CKP,mc_QGL,mc_class,asd_P-PDG,asd_P-TPT,asd_T-TPT,asd_P-MON-CKP,asd_T-JUS-CKP,asd_QGL,asd_class
0,0.02227,0.018428,0.08336,2.135963,0.073821,7.120489,0.0,0.557835,0.414752,2.023835,46.143544,1.730109,124.333138,0.0,42.812177,,22.153243,4.106382,23.866981,1.780758,,2026.39898,,546.194216,20.517004,637.63581,5.6678,,2107944000.0,1231681000.0,5120.369773,226793200.0,3242.213724,71.938753,270.0,23396920.0,13652481.0,56.558224,2155294.7,35.794288,0.0,3.0,23482174.0,13726937.0,57.227508,3068551.3,36.385653,1.633145,3.0,23419108.5,13684227.5,56.780088,2442188.25,36.028804,0.54273,3.0,23400809.0,13659270.2,56.659295,2332873.14,35.883943,0.004332,3.0,660.644195,-258.086142,0.006273,-8764.319226,0.00342,-0.018263,0.0,51.714015,-5.570076,-5.2e-05,-368.785732,0.000173,0.000479,0.0
1,0.381236,0.615268,0.444707,2.993926,0.101528,8.14669,0.0,7.678399,13.151762,7.612511,64.30426,2.453556,132.43948,0.0,10.147563,8.019541,10.626397,3.342626,20.112556,1.643413,,121.062514,74.57083,133.40511,14.045253,450.362932,5.366474,,2159340000.0,1118209000.0,4589.166574,147442700.0,3279.670068,46.145508,270.0,22869665.0,11424941.0,42.854535,1200219.9,35.962461,0.0,3.0,24466544.0,13676589.0,57.110023,2175734.1,36.756317,2.533159,3.0,24284489.5,12416122.5,51.28267,1552636.3,36.482696,0.077642,3.0,23481783.2,11735792.2,46.304652,1285821.26,36.219872,0.0,3.0,170.629213,-8274.58427,-0.02206,-9282.851685,-0.004876,0.02523,0.0,54.482955,219.090909,0.000876,-13.088068,-0.000143,-0.001218,0.0
2,0.231358,1.15693,0.318461,0.833006,0.21591,3.306367,0.0,5.576964,27.207773,7.294126,19.19442,5.290091,68.853343,0.0,13.167773,5.697768,11.481575,6.895282,13.625189,3.12603,,194.535251,37.603879,148.646015,54.556361,207.329525,12.636991,,2142372000.0,1278676000.0,5038.124884,113742400.0,3184.876076,143.578729,270.0,23476699.0,12964007.0,53.210616,1183666.9,35.031318,1.187865,3.0,24159372.0,15119397.0,58.220286,1340303.2,35.948,2.239493,3.0,23774610.5,14349974.5,56.087553,1270674.6,35.304474,1.534748,3.0,23586553.8,13530434.8,54.96497,1197871.42,35.121191,1.364097,3.0,7462.033708,24217.865169,-0.0231,-1490.891011,-0.0103,-0.008929,0.0,126.954545,16.0625,-0.001134,-27.332955,5.3e-05,0.000396,0.0
3,0.066194,0.068514,0.049711,4.122415,0.374295,2.823389,0.0,1.629458,1.520922,0.966903,83.069169,8.91606,58.077568,0.0,24.939578,25.498613,29.819388,2.679864,10.374862,3.295454,,690.259544,728.651124,1027.048052,9.777997,121.16384,14.138161,,2166173000.0,1382759000.0,4728.944855,146491800.0,3199.667524,114.036273,270.0,23972336.0,15041208.0,51.957234,1193386.6,34.874796,0.999077,3.0,24167040.0,15507161.0,53.103125,2789044.1,36.770125,1.451167,3.0,24080461.0,15394131.0,52.532584,1473734.75,35.33885,1.26941,3.0,23978896.2,15246458.2,52.24289,1250936.32,34.949521,1.193868,3.0,-2116.426966,-1076.011236,-0.001977,13474.533708,0.018313,1.7e-05,0.0,-37.545455,-307.835227,0.000344,226.927841,-0.000218,0.00065,0.0
4,0.190839,0.844351,0.553246,2.927248,1.085989,2.71579,0.0,4.687469,20.084129,13.237162,47.619965,24.198986,59.537106,0.0,14.496008,6.756835,8.442291,3.555787,6.058372,3.496575,,234.309268,52.182283,80.670704,16.799143,42.416689,15.232295,,2123371000.0,1258225000.0,4904.908877,274295300.0,2924.196237,133.354305,270.0,23360972.0,13392532.0,52.817554,1239993.0,27.484402,1.039456,3.0,23961413.0,15015627.0,56.668225,5132579.5,36.612609,1.798676,3.0,23555319.5,13833923.5,54.383077,2726934.2,33.808151,1.51096,3.0,23405253.6,13467635.0,52.917554,2058594.7,28.139058,1.272232,3.0,-6045.359551,-16927.426966,0.042157,16482.791011,-0.052125,-0.002765,0.0,85.227273,149.545455,0.000177,-2249.094318,0.000939,-0.000242,0.0


In [27]:
df_all.isna().sum(axis=0)

me_P-PDG         8655
me_P-TPT           64
me_T-TPT         6514
me_P-MON-CKP     1200
me_T-JUS-CKP     1819
                 ... 
asd_T-TPT           0
asd_P-MON-CKP       0
asd_T-JUS-CKP       0
asd_QGL             0
asd_class           0
Length: 77, dtype: int64

In [29]:
df_all = df_all.fillna(df_all.mean())

In [30]:
df_all.isna().sum(axis=0)

me_P-PDG         0
me_P-TPT         0
me_T-TPT         0
me_P-MON-CKP     0
me_T-JUS-CKP     0
                ..
asd_T-TPT        0
asd_P-MON-CKP    0
asd_T-JUS-CKP    0
asd_QGL          0
asd_class        0
Length: 77, dtype: int64