In [60]:
!pip install opendatasets



In [61]:
import opendatasets as od
od.download("https://www.kaggle.com/datasets/purushottamnawale/materials")


Skipping, found downloaded files in "./materials" (use force=True to force download)


In [62]:
import pandas as pd
data = pd.read_csv("/content/materials/material.csv")
data.head()

Unnamed: 0,Material,Su,Sy,E,G,mu,Ro,Use
0,ANSI Steel SAE 1015 as-rolled,421,314,207000,79000,0.3,7860,True
1,ANSI Steel SAE 1015 normalized,424,324,207000,79000,0.3,7860,True
2,ANSI Steel SAE 1015 annealed,386,284,207000,79000,0.3,7860,True
3,ANSI Steel SAE 1020 as-rolled,448,331,207000,79000,0.3,7860,True
4,ANSI Steel SAE 1020 normalized,441,346,207000,79000,0.3,7860,True


In [63]:
#checking if null value exist
data.isna().sum()

Unnamed: 0,0
Material,0
Su,0
Sy,0
E,0
G,0
mu,0
Ro,0
Use,0


In [64]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
columns = ["Material", "Use"]
for col in columns:
    data[col] = le.fit_transform(data[col])
data.head()

Unnamed: 0,Material,Su,Sy,E,G,mu,Ro,Use
0,441,421,314,207000,79000,0.3,7860,1
1,442,424,324,207000,79000,0.3,7860,1
2,440,386,284,207000,79000,0.3,7860,1
3,444,448,331,207000,79000,0.3,7860,1
4,445,441,346,207000,79000,0.3,7860,1


In [65]:
from sklearn.model_selection import train_test_split
y = data["Use"]
X = data.drop("Use", axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f'Training data has {len(X_train)} rows')
print(f'Test data has {len(X_test)} rows')

Training data has 1241 rows
Test data has 311 rows


**Visualizing X, Y train data**

In [66]:
X_train.head()

Unnamed: 0,Material,Su,Sy,E,G,mu,Ro
1080,1238,600,320,201000,78000,0.29,7856
1232,1028,390,215,200000,77000,0.3,7850
1108,1084,575,295,200000,77000,0.3,7850
1252,1087,540,250,206000,80000,0.29,7860
382,4,69,28,73000,26000,0.33,2700


In [67]:
y_train.head()

Unnamed: 0,Use
1080,0
1232,0
1108,0
1252,0
382,0


**Linear Regression**

In [68]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

classifier = LogisticRegression(random_state = 48, max_iter = 10000)
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)

print(f'Accuracy of Linear Regression is: {accuracy_score(y_test, predictions)}')

Accuracy of Linear Regression is: 0.887459807073955


**Support Vector Machine**

In [69]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

classifier = SVC(kernel = "rbf", random_state = 25)
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)

print(f'Accuracy of Support Vector Machine is: {accuracy_score(y_test, predictions)}')

Accuracy of Support Vector Machine is: 0.9163987138263665


**K-neighbors Classifier**

In [70]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)

print(f'Accuracy of K-neighbors Classifier is: {accuracy_score(y_test, predictions)}')

Accuracy of K-neighbors Classifier is: 0.9871382636655949


**Decision Tree**

In [71]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

classifier = DecisionTreeClassifier(criterion = "gini", random_state = 27)
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)

print(f'Accuracy of Decision Tree is: {accuracy_score(y_test, predictions)}')

Accuracy of Decision Tree is: 0.9967845659163987


**Random Forest Classifier**

In [72]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

classifier = RandomForestClassifier(n_estimators = 10, criterion = "gini", random_state = 27)
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)

print(f'Accuracy of Random Forest Classifier is: {accuracy_score(y_test, predictions)}')

Accuracy of Random Forest Classifier is: 0.9967845659163987


**XG Boost**

In [73]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

classifier = XGBClassifier(eval_metric = "logloss")
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)

print(f'Accuracy of XG Boost is: {accuracy_score(y_test, predictions)}')

Accuracy of XG Boost is: 0.9935691318327974
