<a href="https://colab.research.google.com/github/mohammadreza76/carbon_nanotubes/blob/master/carbon_nanotubes_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR

## Importing the dataset

In [None]:
data = pd.read_csv('carbon_nanotubes.csv',sep=';')

In [None]:
data.head(5)

Unnamed: 0,Chiral indice n,Chiral indice m,Initial atomic coordinate u,Initial atomic coordinate v,Initial atomic coordinate w,Calculated atomic coordinates u',Calculated atomic coordinates v',Calculated atomic coordinates w'
0,2,1,679005,701318,17033,721039,730232,17014
1,2,1,717298,642129,231319,738414,65675,232369
2,2,1,489336,303751,88462,477676,263221,88712
3,2,1,413957,632996,40843,408823,657897,39796
4,2,1,334292,543401,15989,303349,558807,157373


## Preprocessing data

In [None]:
X = data.iloc[:,:-3].values
y = data.iloc[:,5:].values

In [None]:
X

array([[2, 1, '0,679005', '0,701318', '0,017033'],
       [2, 1, '0,717298', '0,642129', '0,231319'],
       [2, 1, '0,489336', '0,303751', '0,088462'],
       ...,
       [12, 6, '0,923823', '0,568913', '0,819842'],
       [12, 6, '0,934978', '0,602319', '0,938889'],
       [12, 6, '0,953664', '0,698374', '0,962699']], dtype=object)

In [None]:
y

array([['0,721039', '0,730232', '0,017014'],
       ['0,738414', '0,65675', '0,232369'],
       ['0,477676', '0,263221', '0,088712'],
       ...,
       ['0,929403', '0,576284', '0,819879'],
       ['0,941844', '0,610608', '0,938755'],
       ['0,961243', '0,707812', '0,962605']], dtype=object)

replacing (,) to (.) in numbers for converting from string to float in future

In [None]:
def replacing_string(array):
  for i in range(len(array)):
    for j in range(len(array[i])):
      if type(array[i][j]) == str:
        array[i][j] = array[i][j].replace(',','.')
  return array        

In [None]:
X = replacing_string(X)

In [None]:
y = replacing_string(y)

In [None]:
X

array([[2, 1, '0.679005', '0.701318', '0.017033'],
       [2, 1, '0.717298', '0.642129', '0.231319'],
       [2, 1, '0.489336', '0.303751', '0.088462'],
       ...,
       [12, 6, '0.923823', '0.568913', '0.819842'],
       [12, 6, '0.934978', '0.602319', '0.938889'],
       [12, 6, '0.953664', '0.698374', '0.962699']], dtype=object)

In [None]:
y

array([['0.721039', '0.730232', '0.017014'],
       ['0.738414', '0.65675', '0.232369'],
       ['0.477676', '0.263221', '0.088712'],
       ...,
       ['0.929403', '0.576284', '0.819879'],
       ['0.941844', '0.610608', '0.938755'],
       ['0.961243', '0.707812', '0.962605']], dtype=object)

converting string numbers to float number

In [None]:
X = X.astype(np.float)
y = y.astype(np.float)

In [None]:
X

array([[ 2.      ,  1.      ,  0.679005,  0.701318,  0.017033],
       [ 2.      ,  1.      ,  0.717298,  0.642129,  0.231319],
       [ 2.      ,  1.      ,  0.489336,  0.303751,  0.088462],
       ...,
       [12.      ,  6.      ,  0.923823,  0.568913,  0.819842],
       [12.      ,  6.      ,  0.934978,  0.602319,  0.938889],
       [12.      ,  6.      ,  0.953664,  0.698374,  0.962699]])

In [None]:
y

array([[0.721039, 0.730232, 0.017014],
       [0.738414, 0.65675 , 0.232369],
       [0.477676, 0.263221, 0.088712],
       ...,
       [0.929403, 0.576284, 0.819879],
       [0.941844, 0.610608, 0.938755],
       [0.961243, 0.707812, 0.962605]])

## splitting data to train and test sets

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size= 0.25,random_state= 42)

## Feature Scaling

In [None]:
standard_scaler = StandardScaler()
X_train = standard_scaler.fit_transform(X_train)
X_test = standard_scaler.transform(X_test)

In [None]:
X_train

array([[ 0.82930634,  0.39311177, -0.10028201,  1.24449248,  1.69464753],
       [-0.57440376,  1.57864015, -1.01675502, -1.42385089, -1.64543491],
       [ 1.2972097 , -0.79241661,  0.91116498,  1.46709755, -1.49052982],
       ...,
       [-0.10650039,  0.98587596,  1.16779698,  1.33567321,  1.10895409],
       [-1.51021049, -0.19965242, -0.07206538, -1.11793188,  0.19335399],
       [ 0.36140297,  0.39311177, -0.27266576,  1.11335048,  1.31873312]])

In [None]:
X_test

array([[ 1.76511307, -0.19965242, -0.67156841, -1.53611891,  1.23636494],
       [-0.57440376,  1.57864015,  0.57989634,  1.44442784,  1.07436064],
       [-1.97811385, -0.19965242, -1.12997779, -0.23587335, -1.73023661],
       ...,
       [ 0.36140297,  0.39311177, -1.47654049, -0.65218169,  0.34427728],
       [-0.10650039,  0.98587596,  1.13324479, -0.2289616 , -1.52830664],
       [ 1.76511307, -0.79241661, -0.29048496,  1.14521842,  1.16139798]])

## Training the Linear regression model on the training set

In [None]:
carbon_regression_model = LinearRegression()
carbon_regression_model.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Predicting the Test set results in regression model

In [None]:
y_predict = carbon_regression_model.predict(X_test)

In [None]:
r2_score(y_test,y_predict)

0.9992380186075627

print coefficients and intercept

In [None]:
print('linear model coefficients :' , carbon_regression_model.coef_)
print('linear model intercept :' , carbon_regression_model.intercept_)

linear model coefficients : [[-1.97330523e-05 -3.47134407e-05  2.90335631e-01  6.55927520e-04
   1.84077380e-04]
 [-1.31621303e-05 -2.89946768e-05  2.46025632e-04  2.91233264e-01
   2.62000351e-04]
 [ 2.13757539e-04 -2.19541037e-04 -3.61508448e-04  2.77183882e-04
   2.88368130e-01]]
linear model intercept : [0.50257477 0.50172331 0.50042997]


## Predicting on training set

In [None]:
y_train_predict = carbon_regression_model.predict(X_train)

## show the results

In [None]:
print("The R2 score on the Train set is:\t{:0.3f}".format(r2_score(y_train, y_train_predict)))
print("The R2 score on the Test set is:\t{:0.3f}".format(r2_score(y_test, y_predict)))

The R2 score on the Train set is:	0.999
The R2 score on the Test set is:	0.999


## Applying k-Fold Cross Validation on SVR model

In [None]:
carbon_svr_model = SVR()


In [None]:
accuracies = cross_val_score(estimator= carbon_svr_model,X =X_train,y= y_train[:,1],cv= 10)
print("accuracy: {:.2f}".format(accuracies.mean()*100 ))
print("standard Deviation: {:.2f}".format(accuracies.std()*100))

accuracy: 95.18
standard Deviation: 0.16


In [None]:
accuracies = cross_val_score(estimator= carbon_svr_model,X =X_train,y= y_train[:,0],cv= 10)
print("accuracy: {:.2f}".format(accuracies.mean()*100 ))
print("standard Deviation: {:.2f}".format(accuracies.std()*100))

accuracy: 95.37
standard Deviation: 0.06


In [None]:
accuracies = cross_val_score(estimator= carbon_svr_model,X =X_train,y= y_train[:,2],cv= 10)
print("accuracy: {:.2f}".format(accuracies.mean()*100 ))
print("standard Deviation: {:.2f}".format(accuracies.std()*100))

accuracy: 97.15
standard Deviation: 0.25
