In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import csv

def classify(data, test):
    total_size = data.shape[0]
    print("\nTraining data size:", total_size)
    print("Test data size:", test.shape[0])

    count_yes = np.sum(data[:, -1] == 'yes')
    count_no = total_size - count_yes
    prob_yes = count_yes / total_size
    prob_no = count_no / total_size

    print("\nTarget Count Probability")
    print(f'Yes\t{count_yes}\t{prob_yes}')
    print(f'No\t{count_no}\t{prob_no}')

    accuracy = 0
    print("\nInstance Prediction Target")
    for t in range(test.shape[0]):
        prob0 = np.zeros(test.shape[1] - 1)
        prob1 = np.zeros(test.shape[1] - 1)
        
        for k in range(test.shape[1] - 1):
            count0 = np.sum((data[:, k] == test[t, k]) & (data[:, -1] == 'no'))
            count1 = np.sum((data[:, k] == test[t, k]) & (data[:, -1] == 'yes'))
            prob0[k] = count0 / count_no
            prob1[k] = count1 / count_yes

        prob_no_combined = prob_no * np.prod(prob0)
        prob_yes_combined = prob_yes * np.prod(prob1)
        
        prediction = 'yes' if prob_yes_combined > prob_no_combined else 'no'
        print(f'{t + 1}\t{prediction}\t{test[t, -1]}')

        if prediction == test[t, -1]:
            accuracy += 1

    final_accuracy = (accuracy / test.shape[0]) * 100
    print(f"Accuracy: {final_accuracy}%")

# Read data
data = pd.read_csv("3-dataset.csv")
metadata = data.columns.tolist()
print("The attribute names of training data:", metadata)

# Split dataset
split_ratio = 0.6
train_data, test_data = train_test_split(data, test_size=1 - split_ratio)

print("\nThe training data set is:")
print(train_data)

print("\nThe test data set is:")
print(test_data)

training = train_data.values
testing = test_data.values

classify(training, testing)

The attribute names of training data: ['weather', 'temperature', 'humidity', 'wind', 'play football']

The training data set is:
   weather temperature humidity    wind play football
4     rain        cool   normal    weak           yes
13    rain        mild     high  strong            no
9     rain        mild   normal    weak           yes
0    sunny         hot     high    weak            no
12  cloudy         hot   normal    weak           yes
1    sunny         hot     high  strong            no
2   cloudy         hot     high    weak           yes
5     rain        cool   normal  strong            no

The test data set is:
   weather temperature humidity    wind play football
11  cloudy        mild     high  strong           yes
6   cloudy        cool   normal  strong           yes
7    sunny        mild     high    weak            no
8    sunny        cool   normal    weak           yes
10   sunny        mild   normal  strong           yes
3     rain        mild     high    wea

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load data
data = pd.read_csv("3-dataset.csv").replace('?', np.nan)
metadata = data.columns.tolist()
print("Attributes:", metadata)

# Split data
train_data, test_data = train_test_split(data, test_size=0.4, random_state=42)
print("\nTraining Data:\n", train_data)
print("\nTest Data:\n", test_data)

def classify(train, test):
    total_size = len(train)
    count_yes = np.sum(train.iloc[:, -1] == 'yes')
    count_no = total_size - count_yes
    prob_yes, prob_no = count_yes / total_size, count_no / total_size
    
    print(f"\nTraining Size: {total_size}, Test Size: {len(test)}")
    print(f"Yes: {count_yes} ({prob_yes}), No: {count_no} ({prob_no})")
    
    correct_predictions = 0
    print("\nInstance Prediction Target")
    for i, test_instance in test.iterrows():
        prob0 = (train.iloc[:, :-1][train.iloc[:, -1] == 'no'] == test_instance[:-1]).mean()
        prob1 = (train.iloc[:, :-1][train.iloc[:, -1] == 'yes'] == test_instance[:-1]).mean()
        prob_no_combined = prob_no * prob0.prod()
        prob_yes_combined = prob_yes * prob1.prod()
        
        prediction = 'yes' if prob_yes_combined > prob_no_combined else 'no'
        correct_predictions += (prediction == test_instance[-1])
        print(f"{i + 1}\t{prediction}\t{test_instance[-1]}")
    
    accuracy = correct_predictions / len(test) * 100
    print(f"Accuracy: {accuracy}%")

classify(train_data, test_data)


Attributes: ['weather', 'temperature', 'humidity', 'wind', 'play football']

Training Data:
    weather temperature humidity    wind play football
2   cloudy         hot     high    weak           yes
1    sunny         hot     high  strong            no
13    rain        mild     high  strong            no
4     rain        cool   normal    weak           yes
7    sunny        mild     high    weak            no
10   sunny        mild   normal  strong           yes
3     rain        mild     high    weak           yes
6   cloudy        cool   normal  strong           yes

Test Data:
    weather temperature humidity    wind play football
9     rain        mild   normal    weak           yes
11  cloudy        mild     high  strong           yes
0    sunny         hot     high    weak            no
12  cloudy         hot   normal    weak           yes
5     rain        cool   normal  strong            no
8    sunny        cool   normal    weak           yes

Training Size: 8, Test Size: 

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and split data
data = pd.read_csv("3-dataset.csv").replace('?', np.nan)
train, test = train_test_split(data, test_size=0.4, random_state=42)

def classify(train, test):
    total = len(train)
    yes_count = (train.iloc[:, -1] == 'yes').sum()
    no_count = total - yes_count
    prob_yes, prob_no = yes_count / total, no_count / total

    print(f"\nTraining Size: {total}, Test Size: {len(test)}")
    print(f"Yes: {yes_count} ({prob_yes}), No: {no_count} ({prob_no})")

    correct = 0
    print("\nInstance Prediction Target")
    for i, instance in test.iterrows():
        prob0 = (train[train.iloc[:, -1] == 'no'].iloc[:, :-1] == instance[:-1]).mean()
        prob1 = (train[train.iloc[:, -1] == 'yes'].iloc[:, :-1] == instance[:-1]).mean()
        prob_no_combined = prob_no * prob0.prod()
        prob_yes_combined = prob_yes * prob1.prod()
        prediction = 'yes' if prob_yes_combined > prob_no_combined else 'no'
        correct += (prediction == instance[-1])
        print(f"{i + 1}\t{prediction}\t{instance[-1]}")
    
    accuracy = correct / len(test) * 100
    print(f"Accuracy: {accuracy}%")

classify(train, test)



Training Size: 8, Test Size: 6
Yes: 5 (0.625), No: 3 (0.375)

Instance Prediction Target
10	yes	yes
12	yes	yes
1	no	no
13	yes	yes
6	yes	no
9	yes	yes
Accuracy: 83.33333333333334%


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and split data
df = pd.read_csv("3-dataset.csv")
train, test = train_test_split(df, test_size=0.4, random_state=42)

def classify(train, test):
    total = len(train)
    yes_cnt = (train.iloc[:, -1] == 'yes').sum()
    no_cnt = total - yes_cnt
    p_yes, p_no = yes_cnt / total, no_cnt / total

    print(f"\nTrain Size: {total}, Test Size: {len(test)}")
    print(f"Yes: {yes_cnt} ({p_yes}), No: {no_cnt} ({p_no})")

    correct = 0
    print("\nInstance Prediction Target")
    for i, inst in test.iterrows():
        p0 = (train[train.iloc[:, -1] == 'no'].iloc[:, :-1] == inst[:-1]).mean()
        p1 = (train[train.iloc[:, -1] == 'yes'].iloc[:, :-1] == inst[:-1]).mean()
        p_no_comb = p_no * p0.prod()
        p_yes_comb = p_yes * p1.prod()
        pred = 'yes' if p_yes_comb > p_no_comb else 'no'
        correct += (pred == inst[-1])
        print(f"{i + 1}\t{pred}\t{inst[-1]}")
    
    acc = correct / len(test) * 100
    print(f"Accuracy: {acc}%")

classify(train, test)



Train Size: 8, Test Size: 6
Yes: 5 (0.625), No: 3 (0.375)

Instance Prediction Target
10	yes	yes
12	yes	yes
1	no	no
13	yes	yes
6	yes	no
9	yes	yes
Accuracy: 83.33333333333334%


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("3-dataset.csv")

In [33]:
train,test = train_test_split(df,test_size=0.2,random_state=42)

In [34]:
train

Unnamed: 0,weather,temperature,humidity,wind,play football
12,cloudy,hot,normal,weak,yes
5,rain,cool,normal,strong,no
8,sunny,cool,normal,weak,yes
2,cloudy,hot,high,weak,yes
1,sunny,hot,high,strong,no
13,rain,mild,high,strong,no
4,rain,cool,normal,weak,yes
7,sunny,mild,high,weak,no
10,sunny,mild,normal,strong,yes
3,rain,mild,high,weak,yes


In [35]:
test

Unnamed: 0,weather,temperature,humidity,wind,play football
9,rain,mild,normal,weak,yes
11,cloudy,mild,high,strong,yes
0,sunny,hot,high,weak,no


In [37]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv("3-dataset.csv")
train,test = train_test_split(df,test_size=0.2,random_state=42)

def classify(train,test):
    total = len(train)
    yes_cnt = (train.iloc[:,-1]=='yes').sum()
    no_cnt = total - yes_cnt
    p_yes,p_no = yes_cnt/total , no_cnt / total
    print(f'train size:{total} , test size:{len(test)}')
    print(f'yes:{yes_cnt} ({p_yes}) , no: {no_cnt} ({p_no})')
    correct = 0
    for i , inst in test.iterrows():
        po = (train[train.iloc[:,-1]=='no'].iloc[:,:-1]==inst[:-1]).mean()
        p1 = (train[train.iloc[:,-1]=='yes'].iloc[:,:-1]==inst[:-1]).mean()
        p_no_comb = p_no * po.prod()
        p_yes_comb = p_yes * p1.prod()
        pred = 'yes' if p_no_comb >p_yes_comb else 'no'
        correct += (pred == inst[-1])
        print(f'{i+1}\t{pred}\t {inst[-1]}')
    acc  = correct/len(test)*100
    print("accuracy",acc)
classify(train,test)

train size:11 , test size:3
yes:7 (0.6363636363636364) , no: 4 (0.36363636363636365)
10	no	 yes
12	no	 yes
1	no	 no
accuracy 33.33333333333333


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv("3-dataset.csv")
train,test = train_test_split(df,test_size=0.2,random_state=42)

def classify(train,test):
    total =
    yes_cnt =
    no_cnt = 
    

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [16]:
data = pd.read_csv("3-dataset.csv")
train, test = train_test_split(data,test_size=4,random_state = 42)

In [17]:
def classify(train,test):
    total = len(train)
    yes_count = (train.iloc[:,-1]=='yes').sum()
    no_count = total - yes_count
    prob_yes,prob_no = yes_count / total , no_count/total
    
    print(f'training size {total} test size {len(test)}')
    
    print(f'yes count {yes_count} ({prob_yes}) no_count {no_count} ({prob_no})')
    
    correct  = 0
    print("instances prediction target")
    
    for i,instance in test.iterrows():
        prob0 = (train[train.iloc[:,-1]=='no'].iloc[:,:-1] == instance[:-1]).mean()
        prob1 = (train[train.iloc[:,-1]=='yes'].iloc[:,:-1] == instance[:-1]).mean()
        
        no_prob_combined = prob_no*prob0.prod()
        
        yes_prob_combined = prob_yes*prob1.prod()
        
        prediction = 'yes' if yes_prob_combined > no_prob_combined else 'no'
        
        correct += (prediction == instance[-1])
        
        print(f'{i+1}\t{prediction}\t{instance[-1]}')
    accuracy = correct/len(test)*100
    print(f'accuracy{accuracy}')
classify(train,test)

training size 10 test size 4
yes count 6 (0.6) no_count 4 (0.4)
instances prediction target
10	yes	yes
12	yes	yes
1	no	no
13	yes	yes
accuracy100.0
