In [64]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import TensorDataset,DataLoader


In [65]:
#import necessary libraries
from sklearn.metrics import r2_score
from scipy.stats import pearsonr,spearmanr
from sklearn.metrics import mean_squared_error,mean_absolute_error

#calculate the predicted scores and print it
def scores(actuall_labels, predicted_labels):
    print('Pearson Score: ',pearsonr(actuall_labels,predicted_labels))
    print('R2_Score: ',r2_score(actuall_labels,predicted_labels))
    print('Spearmanr Score: ',spearmanr(actuall_labels,predicted_labels))
    print('Mean Squared Error(MSE): ',mean_squared_error(actuall_labels,predicted_labels))
    print('Mean Absolute Error(MAE): ',mean_absolute_error(actuall_labels,predicted_labels))


In [66]:
train=pd.read_csv('../input/lcp-single-features/single_train.csv',index_col=[0])
test=pd.read_csv('../input/lcp-single-features/single_test.csv',index_col=[0])
val=pd.read_csv('../input/lcp-single-features/single_trail.csv',index_col=[0])

In [67]:
train_x=train.drop(columns=['complexity'])
train_y=train['complexity']

In [68]:
test_x=test.drop(columns=['complexity'])
test_y=test['complexity']

In [69]:
val_x=val.drop(columns=['complexity'])
val_y=val['complexity']

In [70]:
trainx=np.array(train_x)
trainy=np.array(train_y)
testx=np.array(test_x)
testy=np.array(test_y)
valx=np.array(val_x)
valy=np.array(val_y)

In [71]:
from sklearn.ensemble import AdaBoostRegressor
abr = AdaBoostRegressor(random_state=0, n_estimators=100)
abr.fit(trainx, trainy)
pred1=abr.predict(testx)
val1=abr.predict(valx)
scores(testy,pred1)

Pearson Score:  (0.7282476734819661, 2.4970142942305025e-152)
R2_Score:  0.525312111875363
Spearmanr Score:  SpearmanrResult(correlation=0.6794448839232191, pvalue=3.5664023467923117e-125)
Mean Squared Error(MSE):  0.0076828592743114175
Mean Absolute Error(MAE):  0.06854407684931382


In [72]:
from sklearn.ensemble import BaggingRegressor
br = BaggingRegressor(random_state=0, n_estimators=100)
br.fit(trainx, trainy)
pred2=br.predict(testx)
val2=br.predict(valx)
scores(testy,pred2)

Pearson Score:  (0.7339902825219308, 6.448664230497495e-156)
R2_Score:  0.5380820274225202
Spearmanr Score:  SpearmanrResult(correlation=0.6990615011920812, pvalue=1.996802131309206e-135)
Mean Squared Error(MSE):  0.007476177227964598
Mean Absolute Error(MAE):  0.06731752529670194


In [73]:
from sklearn.ensemble import GradientBoostingRegressor
gbr =  GradientBoostingRegressor(random_state=19, n_estimators=100)
gbr.fit(trainx, trainy)
pred3=gbr.predict(testx)
val3=gbr.predict(valx)
scores(testy,pred3)

Pearson Score:  (0.7398624271653936, 1.1029380343797352e-159)
R2_Score:  0.5462761316286369
Spearmanr Score:  SpearmanrResult(correlation=0.7100449178537698, pvalue=1.5367627681660304e-141)
Mean Squared Error(MSE):  0.0073435550333192864
Mean Absolute Error(MAE):  0.06569798774817318


In [74]:
from sklearn import linear_model
lasso = linear_model.Lasso(alpha=0.0005)
lasso.fit(trainx, trainy)
pred4=lasso.predict(testx)
val4=lasso.predict(valx)
scores(testy,pred4)

Pearson Score:  (0.7325321613179145, 5.361863870580975e-155)
R2_Score:  0.535035847144351
Spearmanr Score:  SpearmanrResult(correlation=0.7033477974183024, pvalue=8.871557240236104e-138)
Mean Squared Error(MSE):  0.007525479885535696
Mean Absolute Error(MAE):  0.06743123490022414


In [75]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [76]:

class NN(nn.Module):
    def __init__(self,inp_size,l1_size,l2_size,l3_size):
        super(NN, self).__init__()
        self.layer1=nn.Linear(inp_size,l1_size)
        self.layer2=nn.Linear(l1_size,l2_size)
        self.layer3=nn.Linear(l2_size,l3_size)
        
        self.lin=nn.Linear(l3_size,1)
        self.dropout1=nn.Dropout(0.3)
        self.dropout2=nn.Dropout(0.2)
        self.relu=nn.ReLU()
        self.sigmoid=nn.Sigmoid()
        
        
    def forward(self,inp:Tensor)->Tensor:
        inp=self.layer1(inp)
        inp=self.sigmoid(inp)
        inp=self.dropout1(inp)
        inp=self.layer2(inp)
        inp=self.sigmoid(inp)
        inp=self.dropout2(inp)
        inp=self.layer3(inp)
        inp=self.sigmoid(inp)
        inp=self.lin(inp)
        inp=self.sigmoid(inp)
        return inp
        
        
    

In [77]:
NBATCH=64

In [78]:
train_losses = []
val_losses = []
def train_model(train,test,model,n_epochs):
    optimizer=torch.optim.Adam(model.parameters(),lr=0.0001)
    criterion=nn.MSELoss()
    train_loader = DataLoader(train, batch_size=NBATCH, shuffle=True, drop_last=True, pin_memory=True, num_workers=2)
    val_loader = DataLoader(test, batch_size=NBATCH, shuffle=False, drop_last=False, num_workers=2)
    trainSteps = len(train_loader.dataset) // NBATCH
    valSteps = len(val_loader.dataset) // NBATCH
    for epoch in range(1,n_epochs+1):
        totalTrainLoss = 0
        totalValLoss = 0
        model.train()
        for batch,(data,target) in enumerate(train_loader,1):
            optimizer.zero_grad()
            pred=model(data)
            pred=torch.flatten(pred)
            loss=criterion(pred,target)
            loss.backward()
            totalTrainLoss+=loss
            optimizer.step()
        
        model.eval()
        for batch,(data,target) in enumerate(val_loader,1):

            with torch.no_grad():
                pred=model(data)
                pred=torch.flatten(pred)
                loss=criterion(pred,target)
                
                totalValLoss+=loss
                
        avgTrainLoss = totalTrainLoss / trainSteps
        avgValLoss = totalValLoss / valSteps
        train_losses.append(avgTrainLoss.cpu().detach().numpy())
        val_losses.append(avgValLoss.cpu().detach().numpy())
        print("EPOCH: {}/{}".format(epoch, n_epochs))
        print("train loss: {:.5f}, val loss: {:.5f}".format(avgTrainLoss, avgValLoss))
        

In [79]:
trainx.shape

(7662, 3386)

In [80]:
model=NN(trainx.shape[1],32,32,16)

In [81]:
train_tx = torch.Tensor(trainx) 
train_ty = torch.Tensor(trainy)

train_t = TensorDataset(train_tx,train_ty) 

In [82]:
val_tx = torch.Tensor(valx) 
val_ty = torch.Tensor(valy)

val_t = TensorDataset(val_tx,val_ty)

In [83]:
train_model(train_t,val_t,model,100)

EPOCH: 1/100
train loss: 0.08644, val loss: 0.08629
EPOCH: 2/100
train loss: 0.06485, val loss: 0.06511
EPOCH: 3/100
train loss: 0.04866, val loss: 0.04892
EPOCH: 4/100
train loss: 0.03683, val loss: 0.03761
EPOCH: 5/100
train loss: 0.02872, val loss: 0.03032
EPOCH: 6/100
train loss: 0.02371, val loss: 0.02601
EPOCH: 7/100
train loss: 0.02060, val loss: 0.02361
EPOCH: 8/100
train loss: 0.01887, val loss: 0.02236
EPOCH: 9/100
train loss: 0.01806, val loss: 0.02163
EPOCH: 10/100
train loss: 0.01729, val loss: 0.02090
EPOCH: 11/100
train loss: 0.01635, val loss: 0.01971
EPOCH: 12/100
train loss: 0.01535, val loss: 0.01886
EPOCH: 13/100
train loss: 0.01465, val loss: 0.01798
EPOCH: 14/100
train loss: 0.01390, val loss: 0.01711
EPOCH: 15/100
train loss: 0.01327, val loss: 0.01637
EPOCH: 16/100
train loss: 0.01266, val loss: 0.01561
EPOCH: 17/100
train loss: 0.01215, val loss: 0.01495
EPOCH: 18/100
train loss: 0.01162, val loss: 0.01430
EPOCH: 19/100
train loss: 0.01108, val loss: 0.01380
EP

In [84]:
pred5=model(torch.Tensor(testx))
pred5=torch.flatten(pred5).cpu().detach().numpy()
val5=model(torch.Tensor(valx))
val5=torch.flatten(val5).cpu().detach().numpy()
scores(testy,pred5)

Pearson Score:  (0.7479384480526762, 4.918717544263409e-165)
R2_Score:  0.5481943640730688
Spearmanr Score:  SpearmanrResult(correlation=0.7078872285133001, pvalue=2.5711701019990157e-140)
Mean Squared Error(MSE):  0.007312508296516683
Mean Absolute Error(MAE):  0.06582451850249517


In [85]:
####Stacking###
from sklearn.linear_model import LinearRegression
stacked_model1=LinearRegression()
stacked_val=np.column_stack((val1,val2,val3,val4,val5))
stacked_model1.fit(stacked_val,valy)

stacked_pred1=np.column_stack((pred1,pred2,pred3,pred4,pred5))
pred6=stacked_model1.predict(stacked_pred1)
scores(testy,pred6)

Pearson Score:  (0.7564014391416677, 7.262229250206377e-171)
R2_Score:  0.5494580227486121
Spearmanr Score:  SpearmanrResult(correlation=0.720729587586981, pvalue=9.074690977354031e-148)
Mean Squared Error(MSE):  0.007292055885537086
Mean Absolute Error(MAE):  0.06563452473678758


In [98]:
####Stacking###
from sklearn.neighbors import KNeighborsRegressor
stacked_model2=KNeighborsRegressor(n_neighbors=32)
stacked_val=np.column_stack((val1,val2,val3,val4,val5))
stacked_model2.fit(stacked_val,valy)

stacked_pred2=np.column_stack((pred1,pred2,pred3,pred4,pred5))
pred7=stacked_model2.predict(stacked_pred2)
scores(testy,pred7)

Pearson Score:  (0.7551800811721078, 5.215086280377214e-170)
R2_Score:  0.5604477099750123
Spearmanr Score:  SpearmanrResult(correlation=0.7177314179300925, pvalue=5.430603822452624e-146)
Mean Squared Error(MSE):  0.007114186968841741
Mean Absolute Error(MAE):  0.06552319372653785


In [113]:
test_df=pd.read_csv('../input/lcp-test/lcp_single_test.tsv',encoding='utf-8', delimiter='\t', quotechar='\t', keep_default_na=False)
my_submission = pd.DataFrame({'id': test_df['id'], 'label': pred7})
my_submission.to_csv('lcp_task1.csv', header=False,index=False)