In [2]:


class_counts = {'Buys computer': 7, 'Does not buy computer': 5}
total_instances = sum(class_counts.values())

prior_probabilities = {}
for class_label, count in class_counts.items():
    prior_probabilities[class_label] = count / total_instances

print(prior_probabilities)

{'Buys computer': 0.5833333333333334, 'Does not buy computer': 0.4166666666666667}


In [4]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KernelDensity

# Read the data from the image
data = {
    'Age': [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
    'Income': [55000, 48000, 45000, 42000, 37000, 33000, 29000, 25000, 22000, 19000, 16000],
    'Student': [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0],
    'Credit rating': ['fair', 'good', 'excellent', 'fair', 'fair', 'fair', 'poor', 'poor', 'poor', 'fair', 'poor'],
    'Buys computer': ['yes', 'no', 'yes', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no']
}

# Convert the dictionary into a Pandas DataFrame
df = pd.DataFrame(data)

# Calculate class conditional densities for Age and Income features
features = ['Age', 'Income']
classes = df['Buys computer'].unique()

for feature in features:
    print(f"Class Conditional Densities for Feature: {feature}\n")
    for class_label in classes:
        # Filter data for the specific class label
        data_class = df[df['Buys computer'] == class_label][feature]
        
        # Reshape the data to fit the KDE estimator
        data_class = data_class.values.reshape(-1, 1)
        
        # Fit KDE estimator
        kde = KernelDensity(kernel='gaussian').fit(data_class)
        
        # Generate sample data for which to evaluate the estimated densities
        x = pd.DataFrame({feature: range(min(df[feature]), max(df[feature]))})
        x_values = x[feature].values.reshape(-1, 1)
        
        # Evaluate the estimated densities for the sample data
        log_density = kde.score_samples(x_values)
        density = pd.Series(np.exp(log_density), index=x.index)
        
        print(f"Class: {class_label}\nDensity:\n{density}\n")


Class Conditional Densities for Feature: Age

Class: yes
Density:
0    1.510224e-01
1    1.627911e-01
2    1.689747e-01
3    1.627911e-01
4    1.510224e-01
5    8.213469e-02
6    1.804160e-02
7    1.477778e-03
8    4.461210e-05
9    4.955762e-07
dtype: float64

Class: no
Density:
0    0.030801
1    0.056634
2    0.061064
3    0.063936
4    0.068366
5    0.094199
6    0.118234
7    0.124429
8    0.124412
9    0.117680
dtype: float64

Class Conditional Densities for Feature: Income

Class: yes
Density:
0        0.000000e+00
1        0.000000e+00
2        0.000000e+00
3        0.000000e+00
4        0.000000e+00
             ...     
38995    4.955732e-07
38996    4.461008e-05
38997    1.477283e-03
38998    1.799699e-02
38999    8.065691e-02
Length: 39000, dtype: float64

Class: no
Density:
0        0.049868
1        0.030246
2        0.006749
3        0.000554
4        0.000017
           ...   
38995    0.000000
38996    0.000000
38997    0.000000
38998    0.000000
38999    0.000000
Leng

In [6]:
import pandas as pd
from scipy.stats import chi2_contingency
from scipy.stats import pearsonr, spearmanr

# Read the data from the dictionary
data = {
    'Age': [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
    'Income': [55000, 48000, 45000, 42000, 37000, 33000, 29000, 25000, 22000, 19000, 16000],
    'Student': [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0],
    'Credit rating': ['fair', 'good', 'excellent', 'fair', 'fair', 'fair', 'poor', 'poor', 'poor', 'fair', 'poor'],
    'Buys computer': ['yes', 'no', 'yes', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no']
}

# Convert the dictionary into a Pandas DataFrame
df = pd.DataFrame(data)

# Calculate chi-square test for independence between 'Student' and 'Credit rating'
contingency_table = pd.crosstab(df['Student'], df['Credit rating'])
chi2_statistic, p_value, degrees_of_freedom, expected_frequencies = chi2_contingency(contingency_table)

print(f"Chi-square test result for 'Student' and 'Credit rating':")
print(f"Chi-square value: {chi2_statistic}, p-value: {p_value}")

# Calculate correlation for 'Age' and 'Income'
pearson_correlation, pearson_p_value = pearsonr(df['Age'], df['Income'])
spearman_correlation, spearman_p_value = spearmanr(df['Age'], df['Income'])

print(f"Pearson correlation between 'Age' and 'Income': {pearson_correlation}, p-value: {pearson_p_value}")
print(f"Spearman correlation between 'Age' and 'Income': {spearman_correlation}, p-value: {spearman_p_value}")

Chi-square test result for 'Student' and 'Credit rating':
Chi-square value: 2.5732142857142857, p-value: 0.4622047909810518
Pearson correlation between 'Age' and 'Income': -0.9965476878818288, p-value: 4.861402895225907e-11
Spearman correlation between 'Age' and 'Income': -1.0, p-value: 0.0


In [8]:
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

# Data
data = {
    'Age': [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
    'Income': [55000, 48000, 45000, 42000, 37000, 33000, 29000, 25000, 22000, 19000, 16000],
    'Student': [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0],
    'Credit rating': ['fair', 'good', 'excellent', 'fair', 'fair', 'fair', 'poor', 'poor', 'poor', 'fair', 'poor'],
    'Buys computer': ['yes', 'no', 'yes', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no']
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df.drop('Buys computer', axis=1)
y = df['Buys computer']

# One-hot encode categorical columns
X_encoded = pd.get_dummies(X, columns=['Credit rating'], drop_first=True)

# Create Gaussian Naive Bayes model
gnb = GaussianNB()

# Perform cross-validation and get accuracy scores
cv_results = cross_validate(gnb, X_encoded, y, cv=5, scoring='accuracy', return_train_score=False)

# Fit the model on the entire dataset
gnb.fit(X_encoded, y)

# Get classification report
predictions = gnb.predict(X_encoded)
classification_rep = classification_report(y, predictions)

# Print results
print("Cross-Validation Accuracy Scores:", cv_results['test_score'])
print("Mean CV Accuracy:", cv_results['test_score'].mean())
print("\nClassification Report:\n", classification_rep)


Cross-Validation Accuracy Scores: [0.33333333 0.5        0.5        1.         1.        ]
Mean CV Accuracy: 0.6666666666666666

Classification Report:
               precision    recall  f1-score   support

          no       1.00      1.00      1.00         8
         yes       1.00      1.00      1.00         3

    accuracy                           1.00        11
   macro avg       1.00      1.00      1.00        11
weighted avg       1.00      1.00      1.00        11





In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

# Load your dataset
data = pd.read_excel('training (3).xlsx')

# Assuming 'Classification' is the target variable and you have prepared your features and target as X and y

# Separate target variable from features
X = data.drop('Classification', axis=1)  # Features
y = data['Classification']  # Target variable

# Handle categorical variables
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
label_encoders = {}
for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    X[col] = label_encoders[col].fit_transform(X[col])

# Handle missing values
imputer = SimpleImputer(strategy='mean')  # You can change the strategy based on your data
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Gaussian Naive Bayes model
nb_model = GaussianNB()

# Train the model
nb_model.fit(X_train, y_train)

# Make predictions
predictions = nb_model.predict(X_test)

# Evaluate the model
classification_rep = classification_report(y_test, predictions)
print("Classification Report:\n", classification_rep)


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       208
           1       1.00      1.00      1.00       131

    accuracy                           1.00       339
   macro avg       1.00      1.00      1.00       339
weighted avg       1.00      1.00      1.00       339

