<a href="https://colab.research.google.com/github/9characters/ML-Training/blob/main/ML_Workshop_Day1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PYTHON DATA STRUCTURES

In [None]:
# Lists
listA = [12, 24, "Black", "Box"]

In [None]:
print(listA[0])
print(listA[3])

In [None]:
for i in listA:
  print(i)

In [None]:
# Dictionaries
dictA = {"one": 1, "name": "Harry", 23: "roll"}

In [None]:
print(dictA["one"])
print(dictA["name"])
print(dictA[23])

In [None]:
for key, value in dictA.items():
  print(key)

In [None]:
# Tuples
tupleA = (12, 24, "one", "two")

In [None]:
print(tupleA[2])
print(tupleA[3])

In [None]:
for item in tupleA:
  print(item)

# PANDAS

In [None]:
import pandas as pd

data = {"empId": [100, 101, 102, 103],
         "Name": ["John", "Harry", "Peter", "Julie"],
          "age": [24, 27, 28, 23],
       "salary": [45000, 60000, 65000, 35000]}

df = pd.DataFrame(data)
print(df)

In [None]:
# Setting index as empID
df = df.set_index("empId")

In [None]:
print(df)

In [None]:
df = df.reset_index()

In [None]:
# Accessing a columns
print(df["Name"])

In [None]:
# Accessing the rows // Label based indexing
'''print(df.loc[startrow:endrow, startcolumn:endcolumn])'''
print(df.loc[1: 3, "Name": "salary"])
print("\n")
print(df.loc[2, :])

In [None]:
# Get the age of Julie
print(df.loc[3, "age"])

In [None]:
df.loc[0:2, :]

In [None]:
df.loc[:2, "Name": "salary"]

In [None]:
# Position based indexing
df.iloc[2: 5, 1:3]

# LINEAR REGRESSION

In [None]:
!git clone https://github.com/9characters/ML-Training

In [None]:
cd ML-Training/Day 1

In [None]:
# Load the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [None]:
dataset = pd.read_csv("headbrain.csv")

In [None]:
dataset.head()

In [None]:
dataset.columns

In [None]:
#Separating the dependent and independent variables
X = dataset.iloc[:, 0:1].dropna().values
Y = dataset.iloc[:, 1:2].dropna().values

In [None]:
print(X[:5]); print(f"\n {Y[:5]}")

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 5)

In [None]:
print(X_train[:5]); print(f"\n{Y_train[:5]}")

In [None]:
# Importing Linear Regression module from the sklearn library
from sklearn.linear_model import LinearRegression

In [None]:
# Defining the Linear Regression Model
lrModel = LinearRegression()

In [None]:
# Fitting training data
lrModel = lrModel.fit(X_train, Y_train)

In [None]:
# Y Prediction
Y_pred = lrModel.predict(X_test)

In [None]:
# Calculating the R-squared score
resultLR = round(r2_score(Y_test, Y_pred), 3)

In [None]:
print(f"R2 Score using Linear Regression: {resultLR}")

In [None]:
lrModel.predict([[3329]])

In [None]:
plt.scatter(X_test, Y_test, color='red')
plt.plot(X_test, Y_pred, color = 'blue')
plt.xlabel("Head Size(cm^3)")
plt.ylabel("Brain Weight(grams)")
plt.show()

# ARTIFICIAL NEURAL NETWORK

In [None]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
irisData = pd.read_csv("Iris.csv")

In [None]:
dataLabel = {
    0: "Iris-setosa",
    1: "Iris-versicolor",
    2: "Iris-verginica"
}

In [None]:
irisData.head()

In [None]:
#Separating the dependent and independent variables
X = irisData.iloc[:, 0:4]
Y = irisData.iloc[:, 4:5]

In [None]:
lb = LabelBinarizer()
Y = lb.fit_transform(Y)

In [None]:
print(Y[:5])

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state = 105)

In [None]:
# X.head()
print(len(X_train)); print(len(X_test))

In [None]:
# Y.head()
print(len(Y_train)); print(len(Y_test))

In [None]:
model = Sequential()
model.add(Dense(input_dim=4, units=6, activation='sigmoid'))
model.add(Dense(units=6, activation='sigmoid'))
model.add(Dense(units=3, activation="softmax"))

In [None]:
model.compile(loss='categorical_crossentropy', metrics = ["accuracy"], optimizer='adam')

In [None]:
history = model.fit(X, Y, validation_data = (X_test, Y_test), batch_size=5, verbose=1, epochs=100)

In [None]:
def roundOutput(value):
  return np.round(value, 5)

In [None]:
output = model.predict([[5.4, 3.9, 1.7, 0.4]])
print(f"Output from Softmax: {list(map(roundOutput, output[0]))}")
category = np.argmax(output)
print(f"Category: {dataLabel[category]}")

In [None]:
trainAcc = history.history["accuracy"]
valAcc = history.history["val_accuracy"]
trainLoss = history.history["loss"]
valLoss = history.history["val_loss"]

In [None]:
epochs = np.arange(100)
plt.plot(epochs, trainAcc, 'g', label = 'TrainACC')
plt.plot(epochs, valAcc, 'b', label = 'ValAcc')
plt.title('Training vs Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
epochs = np.arange(100)
plt.plot(epochs, trainLoss, 'g', label = 'TrainLoss')
plt.plot(epochs, valLoss, 'b', label = 'ValLoss')
plt.title('Training vs Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()