In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [49]:
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold

### Preprocessing for Abalone:

In [12]:
abalone_df = pd.read_table("/content/drive/My Drive/Abalone/abalone.data", sep=",",header=None)
abalone_df.columns=['sex','length','diameter','height','whole_wt','shucked_wt','viscera_wt','shell_wt','rings']
abalone_df

Unnamed: 0,sex,length,diameter,height,whole_wt,shucked_wt,viscera_wt,shell_wt,rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [13]:
# Converting categorical variable to numerical:

le=LabelEncoder()

abalone_df['sex_c']=le.fit_transform(abalone_df['sex'])
abalone_df

# Drop sex variable after encoding:
abalone_df.drop('sex',axis=1,inplace=True)

In [14]:
# Some outliers has been identified hence removing outliers using zscore greater than 3.

from scipy import stats
z = np.abs(stats.zscore(abalone_df))  
threshold = 3
aba_df = abalone_df[(z < 3).all(axis=1)]    
aba_df

Unnamed: 0,length,diameter,height,whole_wt,shucked_wt,viscera_wt,shell_wt,rings,sex_c
0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15,2
1,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7,2
2,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9,0
3,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10,2
4,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7,1
...,...,...,...,...,...,...,...,...,...
4172,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11,0
4173,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10,2
4174,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9,2
4175,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10,0


In [15]:
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter("ignore")

In [16]:
ab_list=aba_df.index
aba_df['bin_rings']=''

for i in ab_list:
  if aba_df['rings'][i] <= 8:
    aba_df['bin_rings'].loc[i]= 0 
  elif 8 < aba_df['rings'][i] <=15:
    aba_df['bin_rings'].loc[i] =1
  else:
    aba_df['bin_rings'].loc[i] =2

aba_df['bin_rings']=aba_df['bin_rings'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

1.Take one of the supervised learning models you have built recently and apply at least
three dimensionality reduction techniques to it (separately). Be sure to create a short
summary of each technique you use. Indicate how each changed the model
performance. Reference:
https://machinelearningmastery.com/dimensionality-reduction-algorithms-with-python/


With out Dimentionality reduction:

In [37]:
import tensorflow
import keras

X = aba_df.drop(['bin_rings','rings'], axis=1)
y = aba_df['bin_rings']
print("X Shape", X.shape)
print("x Shape", y.shape)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

# Standardization:
sc=StandardScaler()
sc.fit_transform(X_train)

# Initialize Sequential model
model = tensorflow.keras.models.Sequential()
# Normalize input data
model.add(tensorflow.keras.layers.BatchNormalization(input_shape=(8,)))
model.add(tensorflow.keras.layers.Dense(3,activation='softmax'))
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), # default from_logits=False
              metrics=[keras.metrics.SparseCategoricalAccuracy()])
model.fit(X_train,y_train,batch_size=10,verbose=0)
y_pred = model.predict(X_test)
y_pred1 = []
for y in y_pred:
    y_pred1.append(np.argmax(y))
#confusion_matrix(y_test,y_pred1)
print(classification_report(y_test,y_pred1))

X Shape (4027, 8)
x Shape (4027,)


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


              precision    recall  f1-score   support

           0       0.68      0.16      0.26       315
           1       0.66      0.97      0.79       642
           2       0.00      0.00      0.00        50

    accuracy                           0.67      1007
   macro avg       0.45      0.37      0.35      1007
weighted avg       0.64      0.67      0.58      1007



  _warn_prf(average, modifier, msg_start, len(result))


Truncated SVD:

In [60]:
from sklearn.decomposition import TruncatedSVD


X = aba_df.drop(['bin_rings','rings'], axis=1)
y = aba_df['bin_rings']
print("X Shape", X.shape)
print("x Shape", y.shape)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

# Standardization:
sc=StandardScaler()
sc.fit_transform(X_train)

svd = TruncatedSVD(n_components=3)
X_train_svd=svd.fit_transform(X_train)
X_test_svd=svd.fit_transform(X_test)


# Initialize Sequential model
model = tensorflow.keras.models.Sequential()
# Normalize input data
model.add(tensorflow.keras.layers.BatchNormalization(input_shape=(3,)))
model.add(tensorflow.keras.layers.Dense(3,activation='softmax'))
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), # default from_logits=False
              metrics=[keras.metrics.SparseCategoricalAccuracy()])
model.fit(X_train_svd,y_train,batch_size=10,verbose=0)
y_pred = model.predict(X_test_svd)
y_pred1 = []
for y in y_pred:
    y_pred1.append(np.argmax(y))
#confusion_matrix(y_test,y_pred1)
print(classification_report(y_test,y_pred1))

X Shape (4027, 8)
x Shape (4027,)


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


              precision    recall  f1-score   support

           0       0.64      0.77      0.70       315
           1       0.81      0.79      0.80       642
           2       0.00      0.00      0.00        50

    accuracy                           0.74      1007
   macro avg       0.48      0.52      0.50      1007
weighted avg       0.72      0.74      0.73      1007



  _warn_prf(average, modifier, msg_start, len(result))


### PCA

In [63]:
from sklearn.decomposition import PCA

X = aba_df.drop(['bin_rings','rings'], axis=1)
y = aba_df['bin_rings']
print("X Shape", X.shape)
print("y Shape", y.shape)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

# Standardization:
sc=StandardScaler()
sc.fit_transform(X_train)

pca = PCA(n_components=4)
X_train_pca=pca.fit_transform(X_train)
X_test_pca=pca.fit_transform(X_test)

# Initialize Sequential model
model = tensorflow.keras.models.Sequential()
# Normalize input data
model.add(tensorflow.keras.layers.BatchNormalization(input_shape=(4,)))
model.add(tensorflow.keras.layers.Dense(3,activation='softmax'))
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), # default from_logits=False
              metrics=[keras.metrics.SparseCategoricalAccuracy()])
model.fit(X_train_pca,y_train,batch_size=10,verbose=0)
y_pred = model.predict(X_test_pca)
y_pred1 = []
for y in y_pred:
    y_pred1.append(np.argmax(y))
#confusion_matrix(y_test,y_pred1)
print(classification_report(y_test,y_pred1))

X Shape (4027, 8)
y Shape (4027,)


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


              precision    recall  f1-score   support

           0       0.66      0.73      0.70       315
           1       0.80      0.82      0.81       642
           2       0.00      0.00      0.00        50

    accuracy                           0.75      1007
   macro avg       0.49      0.52      0.50      1007
weighted avg       0.72      0.75      0.74      1007



  _warn_prf(average, modifier, msg_start, len(result))


Linear Discriminant Analysis:

In [115]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

X = aba_df.drop(['bin_rings','rings'], axis=1)
y = aba_df['bin_rings']
print("X Shape", X.shape)
print("y Shape", y.shape)

# Standardization:
sc=StandardScaler()
sc.fit_transform(X)

#applying LDA:
lda = LDA()
X_lda = lda.fit_transform(X,y)

X_train,X_test,y_train,y_test=train_test_split(X_lda,y,test_size=0.25,random_state=42)

# Initialize Sequential model
model = tensorflow.keras.models.Sequential()
# Normalize input data
model.add(tensorflow.keras.layers.BatchNormalization(input_shape=(2,)))
model.add(tensorflow.keras.layers.Dense(3,activation='softmax'))
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), # default from_logits=False
              metrics=[keras.metrics.SparseCategoricalAccuracy()])
model.fit(X_train,y_train,batch_size=10,verbose=0)
y_pred = model.predict(X_test)
y_pred1 = []
for y in y_pred:
    y_pred1.append(np.argmax(y))
#confusion_matrix(y_test,y_pred1)
print(classification_report(y_test,y_pred1))


X Shape (4027, 8)
y Shape (4027,)
X Shape (4027, 8)
y Shape (4027,)


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


              precision    recall  f1-score   support

           0       0.65      0.77      0.70       315
           1       0.81      0.80      0.80       642
           2       0.00      0.00      0.00        50

    accuracy                           0.75      1007
   macro avg       0.48      0.52      0.50      1007
weighted avg       0.72      0.75      0.73      1007



  _warn_prf(average, modifier, msg_start, len(result))


2. Write a function that will indicate if an inputted IPv4 address is accurate or not.
IP addresses are valid if they have 4 values between 0 and 255 (inclusive), punctuated
by periods.
Input 1:
2.33.245.5
Output 1:
True
Input 2: 
12.345.67.89
Output 2:
False

In [116]:
def func():
  ip=input("Enter IPV4: ")
  ip=str(ip)
  sum=0
  if '.' in ip:
    x=ip.split(".")
    print(x)
    for i in x:
      if int(i) < 256:
        sum=sum+1
    if sum == 4:
        print ("Valid IPV4")
    else:
        print ("Not a valid IPV4")

  else:
    print("Invalid Format")

In [117]:
func()

Enter IPV4: 2.33.245.5
['2', '33', '245', '5']
Valid IPV4


In [118]:
func()

Enter IPV4: 12.345.67.89
['12', '345', '67', '89']
Not a valid IPV4
