#**Basics of popular Python libraries for AI**

In [None]:
# Data processing using Pandas...


import numpy as np
import pandas as pd

# Loading data from CSV file (with replacement of "/" values to NaN values)...
data = pd.read_csv("/report.csv", na_values = "/")                                                                          					

# Reading first 5 rows...
print(data.head(10))                                                                                                        						 

# Reading last 10 rows...
print(data.tail(10))        
                               
# Reading specific column of DataFrame object                                                          						 
print(data["Ispit1"])                                                                                                        						 
print(data["Ispit2"])

# Reading rows with specific characteristic...
print(data.loc[data["Prisustvo"] == 0])     

# Replacing values (this is only for informative purpose, these values are already replaced)...
data.replace("/", np.nan, inplace = True)                                                                                    					 

# Reading specific columns of rows with specific characteristic...
print(data.loc[data["Ocjena"] > 7].loc[:, ["Indeks", "UKUPNO", "Ocjena"]])                                                 		      	 	 
	
# Deleting specific rows of DataFrame object...
data.dropna(subset = ["Ocjena"], inplace = True)                                                                             					 

# Adding new column (with specific values)...
temporaryStorage = data.replace(np.nan, -1)
data["Ispit1_final"] = np.maximum(temporaryStorage["Ispit1"], temporaryStorage["Ispit1_popravni"]).replace(-1, np.nan)                               
data["Ispit2_final"] = np.maximum(temporaryStorage["Ispit2"], temporaryStorage["Ispit2_popravni"]).replace(-1, np.nan)

# Deleting specific columns of DataFrame object...
data.drop(columns = ["Ispit1", "Ispit2", "Ispit1_popravni", "Ispit2_popravni"], inplace = True)                              			 

# Saving DataFrame object as CSV file...
data.to_csv("report-update.csv", sep = ";")

# Saving DataFrame object as Pickle file...                                                                                                                                                                     
data.to_pickle("report-update.p")                                                                                                                                                                                                                                      

In [None]:
# Data normalization using Sklearn...


import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import scale
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


data = pd.read_csv("/report.csv", na_values = "/")

# Creating imputers for handling NaN values...
simpleImputerMedian = SimpleImputer(strategy = "median")                                                                                               
simpleImputerMean = SimpleImputer(strategy = "mean")             

# Creating MinMax scaler...
minMaxScaler = MinMaxScaler()                                                                              				  

 # Replacing NaN values with "median" strategy...
data["Ispit1"] = simpleImputerMedian.fit_transform(data["Ispit1"].values.reshape(-1, 1))                    		 
data["Ispit2"] = simpleImputerMedian.fit_transform(data["Ispit2"].values.reshape(-1, 1))

 # Replacing NaN values with "mean" strategy...
data["Ispit1_popravni"] = simpleImputerMean.fit_transform(data["Ispit1_popravni"].values.reshape(-1, 1))   	 
data["Ispit2_popravni"] = simpleImputerMean.fit_transform(data["Ispit2_popravni"].values.reshape(-1, 1))

# Normalizing values using Z-score normalization...
data["Ispit1"] = scale(data["Ispit1"])                                                                      				  
data["Ispit2"] = scale(data["Ispit2"])

# Normalizing values using MinMax scaler...
data["Ispit1_popravni"] = minMaxScaler.fit_transform(data["Ispit1_popravni"].values.reshape(-1, 1))         		 
data["Ispit2_popravni"] = minMaxScaler.fit_transform(data["Ispit2_popravni"].values.reshape(-1, 1))       

# Other NaN values will be 0...
data.replace(np.nan, 0, inplace = True)                                                                     				 

# Deleting column with grades...
grades = data["Ocjena"]
data.drop(columns = ["Ocjena"], inplace = True)                                                             				  

# Converting DataFrame object to NumPy array...
gradesNumPyArray = grades.to_numpy()                                                                        				  
dataNumPyArray = data.to_numpy()

# Preparing for classification training...
xTrain, xTest, yTrain, yTest = train_test_split(dataNumPyArray, gradesNumPyArray, test_size = 0.2)          		  

In [None]:
# Classification of Iris flower using Sklearn...


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics


# Loading dataset...
iris = load_iris()                                                                                 			 

x = iris.data
y = iris.target
featureNames = iris.feature_names
targetNames = iris.target_names

print(f"Feature names: {featureNames}")
print(f"Target names: {targetNames}")
print(f"Data examples: {x[:5]}")

# Preparing dataset for training and evaluation...
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size = 0.3, random_state = 1)            	

# Preparing k-Nearest Neighbor cassifier...
knnClassifier = KNeighborsClassifier(n_neighbors = 3)                                               		

# Training...
knnClassifier.fit(xTrain, yTrain)                                                                   			

# Testing...
yPredicted = knnClassifier.predict(xTest)                                                           		
accuracy = metrics.accuracy_score(yTest, yPredicted)
print(f"Accuracy: {accuracy}") 

In [None]:
# Deep learning using Keras (MNIST dataset)...


# Importing MNIST dataset...
from keras.datasets import mnist                                                                                                                                       
from keras import models
from keras import layers
from keras.utils import to_categorical


# Preparing training and testing sets...
(xTrain, yTrain), (xTest, yTest) = mnist.load_data()                                                                                                           

print(f"Training set dimensions: {xTrain.shape}")
print(f"Training set number of elements: {xTrain.shape[0]}")
print(f"Training set size (bytes): {xTrain.itemsize * xTrain.size}")

print(f"Testing set dimensions: {xTest.shape}")
print(f"Testing set number of elements: {xTest.shape[0]}")
print(f"Testing set size (bytes): {xTest.itemsize * xTest.size}")

# Preparing data for artificial neural network...
trainImages = xTrain.reshape((xTrain.shape[0], 28 * 28))                                                                                                
trainImages = trainImages.astype("float32") / 255

testImages = xTest.reshape((xTest.shape[0], 28 * 28))              
testImages = testImages.astype("float32") / 255

print(f"Training set size (bytes): {trainImages.itemsize * trainImages.size}")
print(f"Testing set size (bytes): {testImages.itemsize * testImages.size}")

# Creating neural network model...
neuralNetworkModel = models.Sequential()                                                                                                                    
neuralNetworkModel.add(layers.Dense(512, activation = "relu", input_shape = (28 * 28,)))                
neuralNetworkModel.add(layers.Dense(10, activation = "softmax"))

# Compiling model...
neuralNetworkModel.compile(optimizer = "rmsprop", loss = "categorical_crossentropy", metrics =["accuracy"])     

# Preparing labels...  
trainLabels = to_categorical(yTrain)                                                                                                                                                
testLabels = to_categorical(yTest)

# Neural network model training...
neuralNetworkModel.fit(trainImages, trainLabels, epochs = 5, batch_size = 128)                                                        

# Evaulating model...
testLoss, testAccuracy = neuralNetworkModel.evaluate(testImages, testLabels)                                                        
print(f"Accuracy (on testing data): {testAccuracy}")

Training set dimensions: (60000, 28, 28)
Training set number of elements: 60000
Training set size (bytes): 47040000
Testing set dimensions: (10000, 28, 28)
Testing set number of elements: 10000
Testing set size (bytes): 7840000
Training set size (bytes): 188160000
Testing set size (bytes): 31360000
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy (on testing data): 0.9804999828338623
