In [202]:
import pandas as pd
import os

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error

import category_encoders as ce

pd.options.display.max_columns = 1999

In [192]:
def custom_metric(actuals, forecast, avg_volume):
    """
    This function aims to compute the Custom Accuracy Metric
    for the Novartis Datathon, 3rd edition.

    Given the actuals followed by the forecast and the avg_volume
    of the brand, it will compute the metric score.

    Keyword parameters:
        actuals (float vector): Real value of Y
        forecast (float vector): Volume forecast
        avg_volume (float): Average monthly volume of the 12 months
                            prior to the generic entry.

    Returns:
        custom_metric: Uncertainty Metric score (%)
    """

    # Compute the first part of the equation
    # (custom MAPE with Average volume)
    custom_mape = sum(abs(actuals - forecast)) / (24 * avg_volume)

    # Compute the second part of the equation
    # (custom 6-first-months MAPE with Average volume)
    six_month_mape = \
        abs(sum(actuals[:6]) - sum(forecast[:6])) / (6 * avg_volume)

    # Compute the third part of the equation
    # (custom 6-months MAPE with Average volume)
    twelve_month_mape = \
        abs(sum(actuals[6:12]) - sum(forecast[6:12])) / (6 * avg_volume)

    # Compute the fourth part of the equation
    # (custom 12-months MAPE with Average volume)
    last_month_mape = \
        abs(sum(actuals[12:]) - sum(forecast[12:])) / (12 * avg_volume)

    # Compute the custom metric
    custom_metric = 0.5 * custom_mape + 0.3 * six_month_mape + \
        0.1 * (twelve_month_mape + last_month_mape)

    return custom_metric * 100

In [176]:
generics_count = pd.read_csv('./data/gx_num_generics.csv')
package = pd.read_csv('./data/gx_package.csv')
gx_volume = pd.read_csv('./data/gx_volume.csv')

In [177]:
data_merged = pd.read_csv('/Users/jakob/Desktop/dt_merged_w.csv')

In [178]:
data_merged.drop(['i', 'max_mon'], axis=1, inplace=True)

In [179]:
data_merged

Unnamed: 0,brand,therap,country,A,B,C,D,package,num_generics,test,-137,-136,-135,-134,-133,-132,-131,-130,-129,-128,-127,-126,-125,-124,-123,-122,-121,-120,-119,-118,-117,-116,-115,-114,-113,-112,-111,-110,-109,-108,-107,-106,-105,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89,-88,-87,-86,-85,-84,-83,-82,-81,-80,-79,-78,-77,-76,-75,-74,-73,-72,-71,-70,-69,-68,-67,-66,-65,-64,-63,-62,-61,-60,-59,-58,-57,-56,-55,-54,-53,-52,-51,-50,-49,-48,-47,-46,-45,-44,-43,-42,-41,-40,-39,-38,-37,-36,-35,-34,-33,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
0,brand_1,Nervous_system,country_12,,4.086695,,95.913305,PILL,25,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.257179,0.240887,0.245431,0.265500,0.271535,0.269382,0.292642,0.280513,0.349966,0.321104,0.354295,0.346548,0.362649,0.331070,0.332588,0.379997,0.365481,0.355142,0.398845,0.394533,0.402781,0.441539,0.413772,0.421545,0.454930,0.375371,0.447824,0.457890,0.419393,0.465527,0.453720,0.413584,0.460827,0.473180,0.458033,0.489217,0.515069,0.425046,0.488183,0.487834,0.478158,0.525930,0.484787,0.482062,0.582125,0.526825,0.538536,0.578013,0.611379,0.543496,0.587805,0.597072,0.600953,0.606445,0.596334,0.577363,0.690244,0.642154,0.674841,0.701900,0.699218,0.661779,0.691958,0.680635,0.618279,0.792606,0.713479,0.710594,0.781600,0.736703,0.801588,0.815690,0.755903,0.733564,0.731491,0.835934,0.788389,0.790931,0.849151,0.778444,0.797366,0.872732,0.862792,0.827001,0.939524,0.824263,0.853856,0.935625,0.843503,0.880230,0.947375,0.850931,0.926276,0.938256,0.930648,0.907464,1.000050,0.862519,0.962146,1.008478,0.923436,1.001614,1,0.878691,0.962668,0.891506,0.881312,0.904497,0.925172,0.811468,0.867441,,,,,,,,,,,,,,,,
1,brand_1,Nervous_system,country_15,,11.585173,,88.414827,PILL,1,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.200011,0.216883,0.211548,0.222075,0.226352,0.222892,0.229677,0.207884,0.230063,0.226941,0.254367,0.246689,0.259456,0.257793,0.247549,0.281729,0.283804,0.289429,0.285395,0.274977,0.283306,0.301125,0.304233,0.321527,0.340147,0.315829,0.342348,0.366696,0.346089,0.395349,0.357416,0.343548,0.403344,0.398547,0.388712,0.429662,0.448091,0.415969,0.441439,0.450975,0.439731,0.490278,0.441230,0.420488,0.499743,0.478556,0.468876,0.510814,0.530022,0.515400,0.549341,0.541493,0.569853,0.611951,0.533752,0.516822,0.608966,0.562472,0.594641,0.615041,0.595201,0.629248,0.642849,0.621760,0.666585,0.674910,0.603535,0.616325,0.669552,0.619674,0.712108,0.643620,0.693117,0.708722,0.662332,0.737301,0.719263,0.724828,0.745017,0.676221,0.743307,0.765590,0.787500,0.725760,0.822745,0.792577,0.779209,0.844497,0.807113,0.794157,0.852204,0.773442,0.838274,0.847267,0.863149,0.852256,0.934323,0.850743,0.929645,0.944801,0.870354,1,0.743884,0.409105,0.444838,0.390535,0.386130,0.372340,0.379942,0.343170,0.362080,0.363910,0.339834,0.390581,,,,,,,,,,,,
2,brand_1,Nervous_system,country_16,,10.043531,1.563480,88.392989,PILL,1,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.521929,0.526968,0.676595,0.547675,0.546253,0.693114,0.541719,0.549259,0.713876,0.573855,0.588542,0.717103,0.582936,0.583797,0.743144,0.624525,0.608420,0.769735,0.620508,0.635487,0.828613,0.675365,0.697784,0.868524,0.703350,0.718334,0.932559,0.766190,0.794312,0.927201,0.817035,0.833912,1.064670,0.847064,0.860956,1.100817,0.858268,0.874259,1.118506,0.893225,0.896336,1.112634,0.872829,0.852014,1.090750,0.848933,0.883467,1.080291,0.835198,0.854452,1.058370,0.885785,0.865769,1.087348,0.886521,0.867290,1.122895,0.891389,0.924267,1.093231,0.834805,0.901628,1.099494,0.904477,0.880898,1.127815,0.834653,0.830471,1.045800,0.851805,0.842025,1.023511,0.824160,0.805676,1.035161,0.843599,0.837667,1.096532,0.826876,0.810435,1.035800,0.846223,0.851383,1.011700,0.821868,0.810888,1.051147,0.856488,0.855713,1.093033,0.803803,0.804970,1.043515,0.846890,0.844801,1.070668,0.827463,0.852305,1.080665,0.856027,0.873582,1.111108,0.819792,0.739706,1,0.825363,0.368165,0.312206,0.263998,0.256900,0.310057,0.234849,,,,,,,,,,,,,,,,,
3,brand_1,Nervous_system,country_3,,9.278268,,90.721732,PILL,6,True,,,,,,,,,,,,,,,,,,,,,,,,,,0.194241,0.184216,0.191696,0.207474,0.209736,0.216892,0.224689,0.212407,0.238714,0.227775,0.242394,0.265321,0.275703,0.254385,0.255631,0.298330,0.287530,0.301338,0.316194,0.316041,0.329685,0.371140,0.368723,0.392734,0.435481,0.367601,0.418357,0.448030,0.416039,0.465198,0.451240,0.429233,0.482511,0.503626,0.482554,0.513969,0.553800,0.467560,0.528391,0.555994,0.530081,0.584911,0.561212,0.553415,0.656239,0.629283,0.615855,0.671257,0.700589,0.609225,0.670059,0.674873,0.681398,0.724276,0.701120,0.680110,0.774033,0.732375,0.795223,0.781348,0.796242,0.752222,0.795409,0.794578,0.789432,0.796380,0.795492,0.760121,0.822062,0.788621,0.807564,0.825640,0.850526,0.793695,0.784293,0.888803,0.813697,0.833862,0.873519,0.800261,0.870396,0.877366,0.871355,0.838898,0.944261,0.823813,0.838935,0.931632,0.851098,0.901393,0.927388,0.845385,0.913941,0.949193,0.908081,0.903785,0.984149,0.839359,0.905364,0.995138,0.882065,0.990188,0.964346,0.898113,0.998199,1.001539,0.927672,1.015602,1.053943,0.901284,0.998042,1,0.910117,0.648818,0.536236,0.519108,0.507535,0.420470,0.367643,,,,,,,,,,,,,,,,,
4,brand_1,Nervous_system,country_4,,42.956357,57.043643,,PILL,10,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.828680,0.814487,0.780479,0.792083,0.855327,0.937001,0.861403,0.749954,0.858642,0.836828,0.869246,0.902235,0.950022,0.881153,0.795527,0.991066,0.858890,0.821321,0.786986,0.703346,0.707736,0.765323,0.707620,0.758053,0.805834,0.716150,0.795653,0.798649,0.734042,0.863232,0.760215,0.733819,0.820320,0.813795,0.755238,0.785699,0.866928,0.799118,0.834793,0.858886,0.869611,1.018256,0.787610,0.798007,0.966674,0.913579,0.906671,1.003233,1.052966,1.053018,1.084887,1.008585,1.080176,1.160475,0.984357,0.995271,1.123008,1.074124,1.208745,1.112470,1.049020,1.030346,1.043078,1.013239,1.008906,1.075931,0.948237,0.965711,1.036982,0.977934,1.043413,1.015096,1.081313,1.022123,0.985429,1.044586,0.979530,0.984567,0.985175,0.912029,0.960495,0.974673,0.954764,0.914345,1.044017,0.972273,0.943607,1.012778,0.943851,1.004999,0.925252,0.827726,0.872825,0.944956,0.950848,0.893295,1.038247,0.935976,0.979530,1.020947,0.918332,1,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1073,brand_97,Nervous_system,country_3,,3.230206,,96.769794,PILL,15,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.278830,0.262723,0.296279,0.321481,0.327062,0.336364,0.364148,0.342993,0.399816,0.368206,0.399197,0.418342,0.426638,0.381076,0.404664,0.465970,0.457823,0.471639,0.499097,0.480128,0.498583,0.524427,0.507757,0.505753,0.553420,0.462196,0.528078,0.555902,0.514932,0.571762,0.568857,0.532592,0.597750,0.603161,0.585501,0.619147,0.656269,0.551425,0.636721,0.672302,0.649999,0.707607,0.694019,0.669808,0.791079,0.749206,0.735553,0.785723,0.806555,0.696591,0.779442,0.793247,0.804047,0.839925,0.829791,0.783146,0.901279,0.846492,0.908046,0.881378,0.909013,0.849825,0.907798,0.901229,0.911572,0.933477,0.936623,0.883047,0.957888,0.905991,0.941550,0.963960,0.974989,0.901637,0.905354,1.011967,0.939192,0.943646,1.005799,0.903136,0.987959,0.987432,0.986643,0.933802,1.028633,0.905491,0.929830,1.020613,0.951708,0.978350,1.024554,0.919054,0.983852,1,0.971551,0.547735,0.293776,0.195542,0.187954,0.189458,0.161222,0.174399,0.161251,0.144400,0.155134,0.151991,0.140499,0.145498,0.149996,0.125855,0.139104,0.140436,0.131542,0.140854,0.131551,0.127771,0.134235,0.128957
1074,brand_97,Nervous_system,country_4,,17.941044,82.058956,,PILL,20,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.747052,0.737176,0.736507,0.772617,0.795678,0.813888,0.700641,0.661800,0.737880,0.645957,0.693504,0.667696,0.706271,0.679694,0.632845,0.735117,0.745296,0.778979,0.780653,0.773979,0.744803,0.821558,0.744554,0.707555,0.829263,0.724966,0.734913,0.748107,0.685531,0.787964,0.698653,0.686441,0.756176,0.757078,0.749703,0.779210,0.855077,0.721485,0.767572,0.796843,0.749955,0.876440,0.734742,0.736918,0.865117,0.776357,0.756049,0.785650,0.828850,0.756584,0.799870,0.832114,0.914291,1.153709,0.911456,0.901806,0.957712,0.902968,1.039676,1.029734,0.432620,0.360176,0.328030,0.296672,0.302196,0.346181,0.403512,0.423689,0.488321,0.464181,0.496627,0.536444,0.602432,0.586430,0.558770,0.620500,0.681040,0.698565,0.732899,0.695529,0.767075,0.800138,0.792064,0.787864,0.867347,0.796180,0.811301,0.903944,0.903403,0.929731,0.905845,0.894523,0.903735,0.947352,1,0.875032,0.723650,0.529131,0.490876,0.470746,0.414931,0.313423,0.264259,0.237854,0.271985,0.238526,0.223983,0.226995,0.236521,0.203944,0.214295,0.218815,0.206536,0.224186,0.184314,0.176929,0.193770,0.185495,0.193057
1075,brand_97,Nervous_system,country_7,,1.334484,0.000106,98.665410,OTHER,9,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.574325,0.520969,0.552434,0.605620,0.572347,0.565927,0.620880,0.603707,0.631355,0.579670,0.688964,0.647366,0.708083,0.572599,0.645112,0.703662,0.693622,0.630278,0.709416,0.684829,0.689897,0.740166,0.759905,0.718264,0.806526,0.614380,0.794372,0.761798,0.733736,0.748192,0.830553,0.715248,0.800516,0.813635,0.787150,0.847083,0.934745,0.668122,0.855146,0.856249,0.790176,0.859983,0.793297,0.810247,0.909372,0.867206,0.886590,0.901868,0.963731,0.733327,0.914279,0.914531,0.900080,0.911454,0.919101,0.864593,0.986675,0.836133,1.009178,0.969499,1.075774,0.808649,1.057483,0.962460,0.996757,0.960004,0.982917,0.949736,0.987008,0.919098,0.971163,0.964851,1.026309,0.693565,0.916416,0.953169,0.853644,0.842566,0.855557,0.860976,0.872592,0.867606,0.870799,0.834324,0.933505,0.768126,0.878317,0.893382,0.856588,0.802447,0.951566,0.877539,0.915023,0.845410,0.868023,1,0.766395,0.594823,0.785095,0.790879,0.653322,0.656302,0.773826,0.704578,0.737375,,,,,,,,,,,,,,,
1076,brand_98,Anti_infectives,country_1,,0.169352,,99.830648,PILL,1,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.737004,0.836206,0.826538,0.877540,0.885946,0.846154,0.902480,0.726215,0.810004,0.767830,0.849236,0.803139,0.903321,0.884405,0.879781,1.002802,0.912428,0.885666,0.947597,0.876419,0.910607,0.984868,0.885946,0.904722,0.957545,0.874737,1.015413,0.992154,0.917753,0.975340,0.927000,0.882724,0.967633,0.931484,0.872215,0.882864,0.916632,0.865350,0.974639,0.952641,0.891131,0.964971,0.940591,0.874877,1.007566,0.882303,0.825977,0.883144,0.885946,0.902480,0.945635,0.978142,0.965252,1,0.927280,0.810845,0.935267,0.774835,0.855121,0.865350,0.774275,0.850778,0.863248,0.875438,0.825557,0.875158,0.850077,0.751436,0.690486,0.721732,0.712204,0.644809,0.722152,0.739246,0.721872,0.812106,0.742889,0.724814


In [180]:
data_merged = data_merged[data_merged['test'] == False]

In [181]:
data_merged[['A', 'B', 'C', 'D']] = data_merged[['A', 'B', 'C', 'D']].fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [182]:
X = data_merged.iloc[:, :9]
y = data_merged.iloc[:, -24]

In [183]:
encoder = ce.TargetEncoder(cols=['brand'])
encoder.fit(X, y)
X = encoder.transform(X)

In [184]:
X = pd.get_dummies(X)

In [185]:
X

Unnamed: 0,brand,A,B,C,D,num_generics,therap_Anti_infectives,therap_Antineoplastic_and_immunology,therap_Cardiovascular_Metabolic,therap_Dermatology,therap_Endocrinology_and_Metabolic_Disease,therap_Haematology,therap_Muscoskeletal_Rheumatology_and_Osteology,therap_Nervous_system,therap_Obstetrics_Gynaecology,therap_Other,therap_Parasitology,therap_Respiratory_and_Immuno_inflammatory,therap_Sensory_organs,therap_Systemic_Hormones,country_country_1,country_country_10,country_country_11,country_country_12,country_country_14,country_country_15,country_country_16,country_country_2,country_country_3,country_country_4,country_country_5,country_country_7,country_country_8,country_country_9,package_CREAM,package_EYE_DROP,package_INJECTION,package_OTHER,package_PATCH,package_PILL
6,0.806737,0.0,36.170513,0.000000,63.829487,17,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
7,0.855763,0.0,1.015697,0.000000,98.984303,6,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
8,0.855763,0.0,0.008023,0.000000,99.991977,9,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
9,0.855763,0.0,3.722347,0.000000,96.277653,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
10,0.855763,0.0,4.681257,1.406766,93.911977,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1072,0.782186,0.0,1.745928,0.000000,98.254072,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1073,0.782186,0.0,3.230206,0.000000,96.769794,15,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1
1074,0.782186,0.0,17.941044,82.058956,0.000000,20,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
1076,0.806737,0.0,0.169352,0.000000,99.830648,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [186]:
y

6       0.986533
7       0.866219
8       0.923390
9       0.889393
10      0.753812
          ...   
1072    0.282406
1073    0.971551
1074    0.875032
1076    0.927280
1077    0.870178
Name: 0, Length: 887, dtype: float64

In [187]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [197]:
regressor = DecisionTreeRegressor(random_state=0, criterion='mae')
regressor.fit(X_train, y_train)

DecisionTreeRegressor(criterion='mae', random_state=0)

In [203]:
y_pred = regressor.predict(X_test)
mean_absolute_error(y_test, y_pred)

0.15723506237419027