# 1. Necessary Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GroupKFold
from sklearn.linear_model import ElasticNet
from sklearn.metrics import log_loss
import warnings
warnings.filterwarnings('ignore')

# 2. Reading In Data

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')
train_scores = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
submission = pd.read_csv('../input/lish-moa/sample_submission.csv')

# 3. Categorical Encoding and Normalizing

In [None]:
train_features['WHERE'] = 'train'
test_features['WHERE'] = 'test'
data = train_features.append([test_features])
COLS = ['cp_type', 'cp_dose']
FE = []
for col in COLS:
    for mod in data[col].unique():
        FE.append(mod)
        data[mod] = (data[col] == mod).astype(int)

In [None]:
data['cp_Time'] = (data['cp_time'] - data['cp_time'].min())/(data['cp_time'].max() - data['cp_time'].min())
FE += ['cp_Time']
data_new = data.copy()
for i in data.columns[4:-6]:
    data_new[f'{i}_new'] = (data[f'{i}']-data[f'{i}'].min())/(data[f'{i}'].max() - data[f'{i}'].min())
    FE.append(f'{i}_new')

In [None]:
train_features = data_new.loc[data_new.WHERE=='train']
test_features = data_new.loc[data_new.WHERE=='test']

# 4. Creating Folds

In [None]:
folds = train_scores.copy()
TARGET = folds.columns[1:]
N_FOLD = 10
Fold = GroupKFold(n_splits=N_FOLD)
groups = folds['sig_id'].values
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds[TARGET], groups)):
    folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)

# 5. Single Fold

In [None]:
def run_single_model(clf, train_df, test_df, folds, features, target, fold_num=0):
    trn_idx = folds[folds.fold!=fold_num].index
    val_idx = folds[folds.fold==fold_num].index
    
    y_tr = target.iloc[trn_idx].values
    X_tr = train_df.iloc[trn_idx][features].values
    y_val = target.iloc[val_idx].values
    X_val = train_df.iloc[val_idx][features].values
    
    oof = np.zeros((len(train_df),206))
    predictions = np.zeros((len(test_df),206))
    clf.fit(X_tr, y_tr)
    
    oof[val_idx] = clf.predict(X_val)
    predictions += clf.predict(test_df[features])
    return oof, predictions

# 6. N-Folds

In [None]:
def run_kfold_model(clf, train, test, folds, features, target, n_fold=9):
    
    # n_fold from 5 to 7
    
    oof = np.zeros((len(train),206))
    predictions = np.zeros((len(test),206))
    feature_importance_df = pd.DataFrame()

    for fold_ in range(n_fold):

        _oof, _predictions = run_single_model(clf,train, test, folds, features, target, fold_num = fold_)

        oof += _oof
        predictions += _predictions/n_fold
    
    return oof, predictions

# 7.Metric

In [None]:
somthing_rate = 5e-4
P_MIN = somthing_rate
P_MAX = 1 - P_MIN

def loss_fn(yt, yp):
    yp = np.clip(yp, P_MIN, P_MAX)
    return log_loss(yt, yp, labels=[0,1])

def metric(y_true, y_pred):
    metrics = []
    for _target in train_scores.columns[1:]:
        metrics.append(loss_fn(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float)))
    return np.mean(metrics)

# 8. Running the Model and Getting Prediction

In [None]:
for alpha1 in [0.3]:
    for l1s in [0.8]:
        
        print(" For alpha:",alpha1,"& l1_ratio:",l1s)
        clf = ElasticNet(alpha=alpha1, l1_ratio = l1s, tol=0.01, max_iter=5000)
        oof, predictions = run_kfold_model(clf, train_features, test_features, folds, FE, train_scores[train_scores.columns[1:]], n_fold=N_FOLD)
        train_new_scores = train_scores.copy()
        train_new_scores[train_new_scores.columns[1:]] = oof
        print(metric(train_scores[train_scores.columns[1:]], train_new_scores[train_new_scores.columns[1:]]))
        submission[submission.columns[1:]] = predictions

# 9. Submitting the kernel

In [None]:
submission.to_csv('submission.csv',index=False)

### Plz upvote if you find it useful.