In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

In [13]:
column = ['ID', 'RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type of Glass']
df = pd.read_csv("Glass Identification.csv", header=None, names=column, skiprows=1)

In [14]:
df = df.drop(columns='ID')
df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type of Glass
0,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
1,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
2,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
3,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1
4,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.0,0.26,1


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 213 entries, 0 to 212
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   RI             213 non-null    float64
 1   Na             213 non-null    float64
 2   Mg             213 non-null    float64
 3   Al             213 non-null    float64
 4   Si             213 non-null    float64
 5   K              213 non-null    float64
 6   Ca             213 non-null    float64
 7   Ba             213 non-null    float64
 8   Fe             213 non-null    float64
 9   Type of Glass  213 non-null    int64  
dtypes: float64(9), int64(1)
memory usage: 16.8 KB


In [16]:
df.isnull().sum()

RI               0
Na               0
Mg               0
Al               0
Si               0
K                0
Ca               0
Ba               0
Fe               0
Type of Glass    0
dtype: int64

In [17]:
df.describe()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type of Glass
count,213.0,213.0,213.0,213.0,213.0,213.0,213.0,213.0,213.0,213.0
mean,1.518353,13.406761,2.676056,1.446526,72.655023,0.499108,8.957934,0.175869,0.057277,2.788732
std,0.003039,0.818371,1.440453,0.499882,0.774052,0.653035,1.426435,0.498245,0.097589,2.10513
min,1.51115,10.73,0.0,0.29,69.81,0.0,5.43,0.0,0.0,1.0
25%,1.51652,12.9,2.09,1.19,72.28,0.13,8.24,0.0,0.0,1.0
50%,1.51768,13.3,3.48,1.36,72.79,0.56,8.6,0.0,0.0,2.0
75%,1.51915,13.83,3.6,1.63,73.09,0.61,9.18,0.0,0.1,3.0
max,1.53393,17.38,3.98,3.5,75.41,6.21,16.19,3.15,0.51,7.0


In [18]:
corr_matrix = df.corr()
corr_matrix

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type of Glass
RI,1.0,-0.193436,-0.128118,-0.405671,-0.54001,-0.2879,0.812495,0.001062,0.145791,-0.161322
Na,-0.193436,1.0,-0.276486,0.157928,-0.068519,-0.26552,-0.275314,0.327233,-0.240802,0.504983
Mg,-0.128118,-0.276486,1.0,-0.480035,-0.160359,0.009397,-0.444559,-0.492149,0.086906,-0.744004
Al,-0.405671,0.157928,-0.480035,1.0,-0.009226,0.324484,-0.260372,0.478936,-0.076456,0.597754
Si,-0.54001,-0.068519,-0.160359,-0.009226,1.0,-0.197684,-0.210141,-0.104361,-0.097674,0.147767
K,-0.2879,-0.26552,0.009397,0.324484,-0.197684,1.0,-0.318649,-0.04379,-0.009586,-0.012765
Ca,0.812495,-0.275314,-0.444559,-0.260372,-0.210141,-0.318649,1.0,-0.113121,0.124674,0.000372
Ba,0.001062,0.327233,-0.492149,0.478936,-0.104361,-0.04379,-0.113121,1.0,-0.059729,0.574896
Fe,0.145791,-0.240802,0.086906,-0.076456,-0.097674,-0.009586,0.124674,-0.059729,1.0,-0.19109
Type of Glass,-0.161322,0.504983,-0.744004,0.597754,0.147767,-0.012765,0.000372,0.574896,-0.19109,1.0


In [19]:
X = df.drop(columns='Type of Glass')
y = df['Type of Glass']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [20]:
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy=", accuracy)

Accuracy= 0.7441860465116279


In [21]:
values = [[1.518353,13.40676,2.676056,1.446526,72.655023,0.499108,8.957934,0.175869,0.057277]]
df1 = pd.DataFrame(values, columns=['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe'])
df1_scaled = scaler.transform(df1)
type_of_glass = rf_model.predict(df1_scaled)
print("Predicted Type of Glass:", type_of_glass[0])

Predicted Type of Glass: 2
