PROGRAM 1 : Find-S

In [1]:
import csv
from google.colab import files

def create_csv_file(filename):
    data = [
        ['Weather', 'Temperature', 'Humidity', 'Wind', 'Water', 'Forecast', 'Play'],
        ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'yes'],
        ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'yes'],
        ['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'no'],
        ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'yes']
    ]
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(data)
        print(f"CSV file '{filename}' created successfully.")

def find_s_algorithm(filename, num_attributes):
    a = []
    print("\nThe given training data set\n")

    with open(filename, 'r') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            a.append(row)
            print(row)

    print("\nThe initial value of hypothesis:")
    hypothesis = ['0'] * num_attributes
    print(hypothesis)

    for j in range(num_attributes):
        hypothesis[j] = a[1][j]

    print("\nFind S: Finding a maximally specific hypothesis\n")

    for i in range(1, len(a)):
        if a[i][num_attributes] == 'yes':
            for j in range(num_attributes):
                if a[i][j] != hypothesis[j]:
                    hypothesis[j] = '?'
                else:
                    hypothesis[j] = a[i][j]
            print(f"For training instance no:{i} the hypothesis is: {hypothesis}")

    print("\nThe maximally specific hypothesis for a given training examples:\n")
    print(hypothesis)

def main():
    filename = 'enjoysport.csv'
    num_attributes = 6
    create_csv_file(filename)
    find_s_algorithm(filename, num_attributes)
    files.download(filename)

main()


CSV file 'enjoysport.csv' created successfully.

The given training data set

['Weather', 'Temperature', 'Humidity', 'Wind', 'Water', 'Forecast', 'Play']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'yes']
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'no']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'yes']

The initial value of hypothesis:
['0', '0', '0', '0', '0', '0']

Find S: Finding a maximally specific hypothesis

For training instance no:1 the hypothesis is: ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
For training instance no:2 the hypothesis is: ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
For training instance no:4 the hypothesis is: ['Sunny', 'Warm', '?', 'Strong', '?', '?']

The maximally specific hypothesis for a given training examples:

['Sunny', 'Warm', '?', 'Strong', '?', '?']


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

PROGRAM 2 : Candidate Elimination

In [2]:
import pandas as pd

#step 1: create dataset
data = [
        ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'yes'],
        ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'yes'],
        ['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'no'],
        ['Sunny', 'Warm', 'High', 'Low', 'Cool', 'Change', 'yes'],
]

#step 2: convert the data to the DataFrame
columns = ['Outlook','Temperature','Humidity','Wind','Play','Activity','Target']
df = pd.DataFrame(data, columns=columns)

#step 3: save the DataFrame to a CSV fie
df.to_csv('dataset.csv',index=False)

#step 4: Read the CSV file
df = pd.read_csv('dataset.csv')
concepts = df.values[:, :-1]
target = df.values[:, -1]

#step 5: Define the Candidate-Elimination algorithm
def learn(concpets,target):
  specific_h = concepts[0].copy() #initialize specific hypothesis
  general_h = [["?" for _ in range(len(specific_h))] for _ in range(len(specific_h))]

  for i, h in enumerate(concepts):
    if target[i] == "yes":
      for x in range(len(specific_h)):
        if h[x] != specific_h[x]:
          specific_h[x] = '?'
          general_h[x][x] = '?'
    elif target[i] == "no":
      for x in range(len(specific_h)):
        if h[x] != specific_h[x]:
          general_h[x][x] = specific_h[x]
        else:
          general_h[x][x] = '?'

  #Remove all-? hypotheses from general hypotheses
  general_h = [gh for gh in general_h if gh != ['?' for _ in range(len(specific_h))]]

  return specific_h, general_h

#step 6: Run the learning process
s_final, g_final = learn(concepts, target)

#step 7: print the final specific and general hypotheses
print(f"Final S: {s_final}")
print(f"Final G: {g_final}")

Final S: ['Sunny' 'Warm' '?' '?' '?' '?']
Final G: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]


PROGRAM 3 : ID3

In [None]:
from google.colab import drive
import pandas as pd
from math import log
from collections import Counter
from pprint import pprint

# mount google drive
drive.mount('/content/drive')

# copy path of .csv file
df_tennis = pd.read_csv('/content/prog3.csv')

def entropy(probs):
  return sum([-prob * log(prob, 2) for prob in probs])

def entropy_of_list(a_list):
  cnt = Counter(x for x in a_list)
  num_instances = len(a_list) * 1.0
  probs = [x/num_instances for x in cnt.values()]
  return entropy(probs)

def information_gain(df, split_attribute_name, target_attribute_name):
  df_split = df.groupby(split_attribute_name)
  nobs = len(df.index) * 1.0
  df_agg_ent = df_split.agg({target_attribute_name : [entropy_of_list, lambda x: len(x)/nobs]})[target_attribute_name]
  df_agg_ent.columns = ['Entropy','PropObservations']
  new_entropy = sum(df_agg_ent['Entropy'] * df_agg_ent['PropObservations'])
  old_entropy = entropy_of_list(df[target_attribute_name])
  return old_entropy - new_entropy

def id3(df, target_attribute_name, attribute_names, default_class=None):
  cnt = Counter(x for x in df[target_attribute_name])
  if len(cnt) == 1:
    return next(iter(cnt))
  elif df.empty or (not attribute_names):
    return default_class
  else:
    default_class = max(cnt.keys())
    gainz = [information_gain(df, attr, target_attribute_name) for attr in attribute_names]
    index_of_max = gainz.index(max(gainz))
    best_attr = attribute_names[index_of_max]
    tree = {best_attr: {}}
    remaining_attribute_names = [i for i in attribute_names if i!=best_attr]

    for attr_val, data_subset in df.groupby(best_attr):
      subtree = id3(data_subset, target_attribute_name, remaining_attribute_names)
      tree[best_attr][attr_val] = subtree
  return tree

attribute_names = list(df_tennis.columns)
attribute_names.remove('PlayTennis')

tree = id3(df_tennis, 'PlayTennis', attribute_names)

print("\n\nThe Resultant Decision Tree is: \n")
pprint(tree)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The Resultant Decision Tree is:

{'Day': {'D1': 'No',
         'D10': 'Yes',
         'D11': 'Yes',
         'D12': 'Yes',
         'D13': 'Yes',
         'D14': 'No',
         'D2': 'No',
         'D3': 'Yes',
         'D4': 'Yes',
         'D5': 'Yes',
         'D6': 'No',
         'D7': 'Yes',
         'D8': 'No',
         'D9': 'Yes'}}

PROGRAM 4 : ANN

In [None]:
import numpy as np
#1.
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) #normalize input data
y = y/100 #normalize output data!

#2.Activation functions
def sigmoid(x):
  return 1/(1 + np.exp(-x))
def derivatives_sigmoid(x):
  return x * (1 - x)

#3.Network structure and Hyperparameters => learning phase
epoch=1000
learning_rate = 0.6
inputlayer_neurons = 2
hiddenlayer_neurons = 3
output_neurons = 1

#4.Weights initializaion (Weights->{wh,wo}, Biases->{bh,bo})
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wo=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bo=np.random.uniform(size=(1,output_neurons))

#5.Training the Network
for i in range(epoch):
  #Forward Propagation
  net_h = np.dot(X,wh) + bh
  sigma_h = sigmoid(net_h)
  net_o = np.dot(sigma_h,wo) + bo
  output = sigmoid(net_o)
  #Backpropagation =>(to reduce errors)
  deltaK = (y-output)*derivatives_sigmoid(output)
  deltaH = deltaK.dot(wo.T)*derivatives_sigmoid(sigma_h)
  wo = wo + sigma_h.T.dot(deltaK) * learning_rate
  wh = wh + X.T.dot(deltaH) * learning_rate
print(f"Input: \n {X}")
print(f"Actual Output: \n{y}")
print(f"Predicted Output: \n{output}")

Input: 
 [[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]
Actual Output: 
[[0.92]
 [0.86]
 [0.89]]
Predicted Output: 
[[0.89397188]
 [0.88468015]
 [0.89158982]]


PROGRAM 5 : Naive Bayes Text Classifier

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# Sample dataset
data = {
  'text': [
    'I love programming in Python',
    'Python is an amazing language',
    'I hate getting errors in my code',
    'Debugging can be frustrating',
    'Machine learning is fascinating',
    'I dislike syntax errors'],
    'label': ['positive', 'positive', 'negative', 'negative', 'positive', 'negative']
}

#Create a DataFrame
df = pd.DataFrame(data)

#Convert text labels to numerical values
df['label'] = df['label'].map({'positive': 1, 'negative': 0})

# Split dataset into training and test sets with stratification
X_train, X_test, y_train, y_test = train_test_split( df['text'], df['label'], test_size=0.2, stratify=df['label'], random_state=42 )

# Convert text data into feature vectors
vectorizer = CountVectorizer(lowercase=True, stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

#Train Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train_vec, y_train)

#Make predictions
y_pred = clf.predict(X_test_vec)

#Evaluate the model
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy of the classifier:", accuracy)

#Predict a single text
sample_text = ["I enjoy learning about artificial intelligence"]
sample_vec = vectorizer.transform(sample_text)
predicted_label = clf.predict(sample_vec)
print("Predicted label for sample text:", "positive" if predicted_label[0] == 1 else "negative")

Accuracy of the classifier: 1.0
Predicted label for sample text: positive


PROGRAM 6 :

In [None]:
def probAttr(data, attr, val):
    Total = data.shape[0]
    cnt = len(data[data[attr]==val])
    return cnt, cnt/Total

def train(data, Attr, conceptVals, concept):
    conceptProbs = {}
    countConcept = {}
    for cVal in conceptVals:
      countConcept[cVal], conceptProbs[cVal] = probAttr(data, concept, cVal)
    AttrConcept = {}
    probability_list = {}
    for att in Attr:
      probability_list[att] = {}
      AttrConcept[att] = {}
      for val in Attr[att]:
        AttrConcept[att][val] = {}
        a, probability_list[att][val] = probAttr(data, att, val)
        for cVal in conceptVals:
          dataTemp = data[data[att]==val]
          AttrConcept[att][val][cVal] = len(dataTemp[dataTemp[concept]==cVal])/countConcept[cVal]
    print(f"P(A) : {conceptProbs}\n") #Prior Probabilities of each concept
    print(f"P(X/A) : {AttrConcept}\n") #Attribute probability for each attr
    print(f"P(X) : {probability_list}\n") #conditional probability?
    return conceptProbs, AttrConcept, probability_list

def test(examples, Attr, concept_list, conceptProbs, AttrConcept, probability_list):
    misclassification_count = 0
    Total = len(examples)
    for ex in examples:
      px = {}
      for a in Attr:
        for x in ex:
          for c in concept_list:
            if x in AttrConcept[a]:
              if c not in px:
                px[c] = conceptProbs[c] * AttrConcept[a][x][c] / probability_list[a][x]
              else:
                px[c] = px[c] * AttrConcept[a][x][c] / probability_list[a][x]
      print(px)
      classification = max(px, key=px.get)
      print(f"Classification: {classification} Expected: {ex[-1]}")
      if (classification != ex[-1]):
        misclassification_count += 1
        misclassification_rate = misclassification_count*100/Total
        accuracy = 100 - misclassification_rate
    print(f"Misclassification Count: {misclassification_count}")
    print(f"Misclassification Rate: {misclassification_rate}")
    print(f"Accuracy = {accuracy}%")

import pandas as pd
df = pd.read_csv('/home/sahyadri/Desktop/ML/tennis.csv')
concept = str(list(df)[-1])
concept_list = set(df[concept])
Attr = {}
for a in df.columns[:-1]:
  Attr[a] = set(df[a])
  print(f"{a}  :{Attr[a]}")
conceptProbs, AttrConcept, probability_list = train(df, Attr, concept_list, concept)
examples = pd.read_csv('/home/sahyadri/Desktop/ML/tennis.csv')
test(examples.values, Attr, concept_list, conceptProbs, AttrConcept, probability_list)

outlook: {'sunny', 'overcast', 'rain'}
temperature: {'cool', 'mild', 'hot'}
humidity: {'normal', 'high'}
wind: {'strong', 'weak'}
P(A) : {'no': 0.35714285714285715, 'yes': 0.6428571428571429}

P(X/A) : {'outlook': {'sunny': {'no': 0.6, 'yes': 0.2222222222222222}, 'overcast': {'no': 0.0, 'yes': 0.4444444444444444}, 'rain': {'no': 0.4, 'yes': 0.3333333333333333}},
          'temperature': {'cool': {'no': 0.2, 'yes': 0.3333333333333333}, 'mild': {'no': 0.4, 'yes': 0.4444444444444444}, 'hot': {'no': 0.4, 'yes': 0.2222222222222222}},
          'humidity': {'normal': {'no': 0.2, 'yes': 0.6666666666666666}, 'high': {'no': 0.8, 'yes': 0.3333333333333333}},
          'wind': {'strong': {'no': 0.6, 'yes': 0.3333333333333333}, 'weak': {'no': 0.4, 'yes': 0.6666666666666666}}}

P(X) : {'outlook': {'sunny': 0.35714285714285715, 'overcast': 0.2857142857142857, 'rain': 0.35714285714285715},
        'temperature': {'cool': 0.2857142857142857, 'mild': 0.42857142857142855, 'hot': 0.2857142857142857},
        'humidity': {'normal': 0.5, 'high': 0.5},
        'wind': {'strong': 0.42857142857142855, 'weak': 0.5714285714285714}}

{'no': 0.9408000000000002, 'yes': 0.2419753086419753}
Classification : no Expected : no
{'no': 1.8816000000000002, 'yes': 0.16131687242798354}
Classification : no Expected : no
{'no': 0.0, 'yes': 0.6049382716049383}
Classification : yes Expected : yes

{'no': 0.4181333333333335, 'yes': 0.4839506172839506}
Classification : yes Expected : yes
{'no': 0.07840000000000004, 'yes': 1.0888888888888888}
Classification : yes Expected : yes

{'no': 0.15680000000000005, 'yes': 0.7259259259259259}
Classification : yes Expected : no
{'no': 0.0, 'yes': 1.2098765432098766}
Classification : yes Expected : yes


{'no': 0.6272000000000001, 'yes': 0.3226337448559671}
Classification : no Expected : no
{'no': 0.11760000000000002, 'yes': 0.7259259259259256}
Classification : yes Expected : yes
{'no': 0.10453333333333338, 'yes': 0.9679012345679012}
Classification : yes Expected : yes

{'no': 0.31360000000000005, 'yes': 0.43017832647462273}
Classification : yes Expected : yes
{'no': 0.0, 'yes': 0.5377229080932785}
Classification : yes Expected : yes
{'no': 0.0, 'yes': 1.2098765432098766}
Classification : yes Expected : yes

{'no': 0.8362666666666669, 'yes': 0.3226337448559671}
Classification : no Expected : no
Misclassification Count=1
Misclassification Rate=7.142857142857143%
Accuracy=92.85714285714286%

PROGRAM 7 : (Jupyter Notebook) NB

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("/home/sahyadri/Downloads/Heart_Disease (1).csv")

feature_col_names = df.columns[df.columns != 'CHDRisk']
predicted_class_names=['CHDRisk']

X = df[feature_col_names]
y=df[predicted_class_names]
label_encoder = LabelEncoder()

categorical_columns = X.select_dtypes(include = [object]).columns

for col in categorical_columns:
    X[col] = label_encoder.fit_transform(X[col])

y=label_encoder.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=42)
print('Total number of Training Data:',y_train.shape)
print('Total number of Test Data:',y_test.shape)

clf = GaussianNB()
clf.fit(X_train,y_train)
predicted = clf.predict(X_test)

accuracy = metrics.accuracy_score(y_test,predicted)
print('\n Accuracy of the cassifier:',accuracy)

conf_matrix = metrics.confusion_matrix(y_test,predicted)

plt.figure(figsize=(8,6))
sns.heatmap(conf_matrix,annot=True,fmt='d',cmap='Blues',xticklabels=['No disease','Heart Disease'],yticklabels=['No Disease','Heart Disease'])

plt.title('Confusion Matrix')
plt.ylabel('actual')
plt.xlabel('Predicted')
plt.show()

test_data=[[0,63,2,0,5,0,0,0,0,240,120,80,23.5,70,88]]
predict_test_data = clf.predict(test_data)
print('\n Predicted value for individual test data',predict_test_data)

PROGRAM 8: KNN

In [2]:
#import the required packages
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets

#Lood dataset
iris = datasets.load_iris()
print("Iris Data set loaded...")

#Split the data into train and test samples
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1)
print("Dataset is split into training and testing...")
print("Size of training data and its label", x_train.shape, y_train.shape)
print("Size of testing data and its label", x_test.shape, y_test.shape)

#Print Label numbers and their names
for i in range(len(iris.target_names)):
  print("Label", i, "-", str(iris.target_names[i]))

#Create an object of KNN classifier
classifier = KNeighborsClassifier(n_neighbors=1)

#Perform Training
classifier.fit(x_train, y_train)

#Perform testing
y_pred = classifier.predict(x_test)

#Display the results
print("Results of Classification using K-nn with K-1")
for r in range(0, len(x_test)):
  print("Sample:", str(x_test[r]), "Actual-label:", str(y_test[r]), "Predicted-label:", str(y_pred[r]))

#Print Classification Accuracy
print("Classification Accuracy:", classifier.score(x_test, y_test))

Iris Data set loaded...
Dataset is split into training and testing...
Size of training data and its label (135, 4) (135,)
Size of testing data and its label (15, 4) (15,)
Label 0 - setosa
Label 1 - versicolor
Label 2 - virginica
Results of Classification using K-nn with K-1
Sample: [6.4 3.1 5.5 1.8] Actual-label: 2 Predicted-label: 2
Sample: [5.2 3.5 1.5 0.2] Actual-label: 0 Predicted-label: 0
Sample: [5.8 2.7 3.9 1.2] Actual-label: 1 Predicted-label: 1
Sample: [7.  3.2 4.7 1.4] Actual-label: 1 Predicted-label: 1
Sample: [5.  3.5 1.6 0.6] Actual-label: 0 Predicted-label: 0
Sample: [6.7 3.3 5.7 2.5] Actual-label: 2 Predicted-label: 2
Sample: [4.7 3.2 1.3 0.2] Actual-label: 0 Predicted-label: 0
Sample: [7.7 2.6 6.9 2.3] Actual-label: 2 Predicted-label: 2
Sample: [6.5 3.  5.5 1.8] Actual-label: 2 Predicted-label: 2
Sample: [4.6 3.1 1.5 0.2] Actual-label: 0 Predicted-label: 0
Sample: [5.8 2.7 5.1 1.9] Actual-label: 2 Predicted-label: 2
Sample: [6.7 3.1 4.7 1.5] Actual-label: 1 Predicted-la