## 1. Take one of the supervised learning models you have built recently and apply at least three dimensionality reduction techniques to it (separately). Be sure to create a short summary of each technique you use. Indicate how each changed the model performance. https://machinelearningmastery.com/dimensionality-reduction-algorithms-with-python/

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn import tree
from sklearn.metrics import confusion_matrix, classification_report, plot_confusion_matrix
import pydotplus
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

In [3]:
credit_df = pd.read_table("australian.dat", sep=" ",header=None)

In [6]:
#preprocessing
scaler = StandardScaler()
normalized_df = credit_df.drop([11], axis=1)
df_scaled = pd.DataFrame(scaler.fit_transform(normalized_df), columns = normalized_df.columns)
scaled = df_scaled.astype("int64")

In [20]:
#original model
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
rf = RandomForestClassifier(n_estimators=200, max_depth =10, random_state =50,min_samples_leaf=7,
                            min_weight_fraction_leaf=0.2)


X = scaled.drop(14, axis=1)
y = scaled[14]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)


rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.8840579710144928

In [21]:
predictions = rf.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.93      0.89      0.91        87
           1       0.82      0.88      0.85        51

    accuracy                           0.88       138
   macro avg       0.87      0.88      0.88       138
weighted avg       0.89      0.88      0.88       138



In [22]:
#method 1: Singular Value Decomposition/SVD
#This technique works really well for dimensionality reduction for sparse data.
#Using this technique decreased the model performance
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=7)

X_train_svd=svd.fit_transform(X_train)
X_test_svd=svd.fit_transform(X_test)

model = rf.fit(X_train_svd, y_train)
print(model.score(X_train_svd,y_train))

model = rf.fit(X_test_svd, y_test)
print(model.score(X_test_svd,y_test))

0.8297101449275363
0.8405797101449275


In [57]:
#method 2: Isomap Embedding/Isomap, 
#This technique creates an embedding of the dataset and attempts to preserve the relationships in the dataset.
#Using this technique decreased the model performance but playing with the n components can increasae the model's accuracy
from sklearn.manifold import Isomap
iso=Isomap(n_components = 7)

X_train_iso=iso.fit_transform(X_train)
X_test_iso=iso.fit_transform(X_test)

model = rf.fit(X_train_iso, y_train)
print(model.score(X_train_iso,y_train))

model = rf.fit(X_test_iso, y_test)
print(model.score(X_test_iso,y_test))

0.8097826086956522
0.8405797101449275


In [48]:
#method 3: Locally Linear Embedding/LLE
#This technique creates an embedding of the dataset and attempts to preserve the relationships between neighborhoods in the dataset. 
#Using this technique decreased the model performance
from sklearn.manifold import LocallyLinearEmbedding
lle=LocallyLinearEmbedding(random_state=47)

X_train_lle=lle.fit_transform(X_train)
X_test_lle=lle.fit_transform(X_test)

model = rf.fit(X_train_lle, y_train)
print(model.score(X_train_lle,y_train))

model = rf.fit(X_test_lle, y_test)
print(model.score(X_test_lle,y_test))

0.7373188405797102
0.7608695652173914


## 2. Write a function that will indicate if an inputted IPv4 address is accurate or not. IP addresses are valid if they have 4 values between 0 and 255 (inclusive), punctuated by periods.

In [74]:
import re
regex = "^((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])$"

def accurate(address):
    if(re.search(regex, address)):
        return True
    else:
        return False

In [76]:
ip = '12.345.67.89'
accurate(ip)

False

In [77]:
ip = '2.33.245.5'
accurate(ip)

True