**Numpy**

In [None]:
# Ex-1. Create two numpy arrays of size 4 X 5 and 5 X 4.

import numpy as np

arr1 = np.array([[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5]])
# arr1.T gives the transpose of arr1
# Since we want a 5 x 4 matrix, it will be the transpose of a 4 x 5 matrix, i.e. of arr1
arr2 = arr1.T

print(arr1)
print(arr2)

In [None]:
# Ex-2. Randomly initialize the above arrays

import numpy as np

# The below statements randomly initiliaze arrays of sizes 4x5 and 5x4 respectively
# choose the random integers below 100
arr1 = np.random.randint(100, size=(4,5));
# choose the random integers below 200
arr2 = np.random.randint(200, size=(5,4));

print(arr1)
print(arr2)

In [None]:
# Ex-3. Perform matrix multiplication

import numpy as np

arr1 = np.random.randint(10, size=(2, 3))
arr2 = np.random.randint(10, size=(3,4))
print(arr1)
print(arr2)

# '@' operator is a shorthand for np.dot() function
# when matrices are operands, it performs the matrix multiplication of the matrices
arr = arr1 @ arr2
print(arr)
arrd = np.dot(arr1, arr2)
print(arrd)

In [None]:
# Ex-4. Perform element-wise matrix multiplication

# This is also known as 'Hadamard Product' or 'Schur Product'
# Individually multiply the elements both the matrices

import numpy as np

arr1 = np.array([[1,2],[3,4]])
arr2 = np.array([[4,8],[0,5]])

# using np.multiply() method
arrm = np.multiply(arr1, arr2)
print(arrm)
# shorthand for performing np.multiply()
arr = arr1 * arr2
print(arr)

In [None]:
# Ex-5. Find mean, median of the first matrix

import numpy as np

arr = np.random.randint(100, size=(4,5))
print(arr)

# calculates mean along the columns
hmean = np.mean(arr, axis=0)
# calculates mean along the rows
vmean = np.mean(arr, axis=1)
print(hmean, vmean)

# calculates median along the columns
hmed = np.median(arr, axis=0)
# calculates median along the rows
vmed = np.median(arr, axis=1)
print(hmed, vmed)

In [None]:
# Ex-6. (i) Get the transpose of the matrix that you created.

import numpy as np

arr = np.random.randint(100, size=(4,5))
print(arr)

# T property of matrix gives its transpose
arr1 = arr.T
print(arr1)
# Alternatively, we can also use the np.transpose() method
arr2 = np.transpose(arr)
print(arr2)
print()


# Ex-6. (ii) Find the determinant of a square matrix.

mat = np.random.randint(10, size=(2,2))
print(mat)

# np.linalg.det() method allows us to find the determinant of a square matrix
# round() is used because the det() method may give a minor imprecision while representing floating-point values
det1 = np.linalg.det(mat).round()
print(det1)

In [None]:
# Ex-7. Obtain each row in the second column of the first array.

import numpy as np

arr = np.random.randint(100, size=(4,5))
print(arr)
# second column will be the second row in the transpose
row = arr.T[1:2]
print(row)

In [None]:
# Ex-8. Convert Numeric entries(columns) of mtcars.csv to Mean Centered Version.

import numpy as np
import csv

data = []
# read the data from the csv file
with open("mtcars.csv", "r") as file:
    reader = csv.reader(file)
    for lines in reader:
        data.append(lines)

data2 = np.array(data)
# extract the useful data from the main data
# the heading row is not useful
data = data[1:]
data = np.array(data)
# also the car names are not useful
data = np.delete(data, 0, axis=1)

# convert the data from string to float
data = data.astype(float)
# calculate column-wise mean
mean = np.mean(data, axis=0)
for i in range(len(data)):
    data[i] = data[i] - mean

# modify the original data by replacing the subpart which is modified above
data2[1:, 1:] = data

# write the data to a new csv file
with open("mtcars2.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerows(data2)

**NLTK**

Movie Reviews is a set of 2000 movie reviews, 1000 of which are positive and others negative.

In [None]:
import nltk
from nltk.corpus import movie_reviews
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn
import matplotlib.pyplot as plt
import random
import re
import string
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

# nltk.download('movie_reviews')
# nltk.download("stopwords")
# nltk.download("punkt")
# nltk.download("wordnet")
# nltk.download("omw-1.4")

pos_rev = movie_reviews.fileids("pos");
neg_rev = movie_reviews.fileids("neg");
print("The number of positive reviews: ", len(pos_rev), " and their type is: ", type(pos_rev));
print("The number of negative reviews: ", len(neg_rev), " and their type is: ", type(neg_rev));
print()

# plot a figure for the samples
fig = plt.figure(figsize=(5,5))
labels = ["Positive", "Negative"]
sizes = [len(pos_rev), len(neg_rev)]
colors = ["#66ff66", "#ff6666"]
plt.pie(sizes, labels=labels, colors=colors, startangle=45, shadow=False, autopct="%1.2f%%")
plt.axis("equal")
plt.show()
print()

# printing with different colors
# positive in green
# print("\033[92m", movie_reviews.raw(fileids=pos_rev[random.randint(0, 100)]))
# negative in red
# print("\033[91m", movie_reviews.words(neg_rev[random.randint(0, 100)]))

positive_review = movie_reviews.raw(fileids=pos_rev[random.randint(0, 100)])
positive_review_tokens = word_tokenize(positive_review)
print("Tokenized positive review: ", positive_review_tokens)
print()

stopwords_english = stopwords.words("english")
print("Stop words: ", stopwords_english)
print()
print(string.punctuation)
print()

# cleaning the review
review_clean = []
for word in positive_review_tokens:
    if word not in stopwords_english and word not in string.punctuation:
        review_clean.append(word)
print("Cleaned review: ", review_clean)
print()

# now we'll perform stemming, i.e. converting the review to the root words
stemmer = PorterStemmer()
review_stem = []
for word in review_clean:
    review_stem.append(stemmer.stem(word))
# but we see that there are too many miniscule imperfections
print("Stemmed review: ", review_stem)
print()

# hence we use lematizer to mitigate the amount of imperfections
lemmatizer = WordNetLemmatizer()
review_lem = []
for word in review_clean:
    review_lem.append(lemmatizer.lemmatize(word))
print("Lemmatized review: ", review_lem)



**Pandas**

In [None]:
# Ex-1. Draw Scatter Plot between SepalLengthCm and SepalWidthCm for “Iris.csv” file with proper labelling.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("drive/MyDrive/iris.csv")

plt.scatter(data["sepal.length"], data["sepal.width"])
plt.show()

In [None]:
# Ex-2. Draw Histogram of SepalLengthCm with proper labelling.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("drive/MyDrive/iris.csv")

# 'bins' is the number of towers in histogram
plt.hist(data["sepal.length"], bins=20)
plt.show()

In [None]:
# Ex-3. Plot bar chart of Species.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("drive/MyDrive/iris.csv")

df = pd.DataFrame(data, columns=["variety"])
labels = df["variety"].unique()
counts = df["variety"].value_counts().values
plt.bar(labels, counts, color="#FF6600")
plt.show()

In [None]:
# Ex-4. Count total null values for each column in this dataset.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("drive/MyDrive/iris.csv")

print(data.isnull().sum())

In [None]:
# Ex-5. i) Print first 5 rows of SepalLengthCm. ii) Print from 5th row and onwards and entire column of iris.csv dataset.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("drive/MyDrive/iris.csv")

# first 5 rows of SepalLengthCm
print(data.loc[:4, "sepal.length"])
print()
# print from the 5th row onwards and entire column
pd.set_option("display.max_rows", 500)
print(data.loc[5:, "sepal.length"])

**Scikit Learn**

In [None]:
import numpy as np
import matplotlib as plt
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

wine = datasets.load_wine()
X = wine.data
y = wine.target
feature_names = wine.feature_names
target_names = wine.target_names

print("Features: ", feature_names)
print("Targets: ", target_names)
print()
print(X[:5])

X_train, X_test, Y_train, Y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 1
)