<a href="https://colab.research.google.com/github/kssv23/Machine-Learning/blob/main/ML_assign_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder, StandardScaler

from google.colab import files
uploaded = files.upload()

file_name = 'Cancer_Data.xlsx'
df = pd.read_excel(file_name)

print("Dataset Head:")
print(df.head())

print("\nData Types Before Encoding:")
print(df.dtypes)

label_encoders = {}
for column in df.columns:
    if df[column].dtype == 'object':
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le

print("\nData Types After Encoding:")
print(df.dtypes)

print("\nMissing Values:")
print(df.isnull().sum())

if 'diagnosis' in df.columns:
    X = df.drop(['diagnosis'], axis=1)
    y = df['diagnosis']
else:
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def linear_regression_single_attribute(X_train, y_train, X_test, y_test, attribute_index):
    lr = LinearRegression()


    X_train_single = X_train.iloc[:, [attribute_index]]
    X_test_single = X_test.iloc[:, [attribute_index]]

    lr.fit(X_train_single, y_train)
    y_pred = lr.predict(X_test_single)

    print(f"\nSingle Attribute Linear Regression (Attribute Index {attribute_index}):")
    print("Coefficients:", lr.coef_)
    print("Intercept:", lr.intercept_)
    print("R^2 Score:", lr.score(X_test_single, y_test))


    plt.scatter(X_test_single, y_test, color='blue')
    plt.plot(X_test_single, y_pred, color='red', linewidth=2)
    plt.title(f'Single Attribute Linear Regression\n(Feature: {X_train.columns[attribute_index]})')
    plt.xlabel(X_train.columns[attribute_index])
    plt.ylabel('Diagnosis')
    plt.show()

linear_regression_single_attribute(X_train, y_train, X_test, y_test, attribute_index=0)

def linear_regression_multiple_attributes(X_train, y_train, X_test, y_test):
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)

    print("\nMultiple Attributes Linear Regression Results:")
    print("Coefficients:", lr.coef_)
    print("Intercept:", lr.intercept_)
    print("R^2 Score:", lr.score(X_test, y_test))

linear_regression_multiple_attributes(X_train, y_train, X_test, y_test)

def kmeans_clustering(X, num_clusters=2):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    kmeans.fit(X_scaled)

    df['Cluster'] = kmeans.labels_

    print("\nKMeans Clustering Results:")
    print(df['Cluster'].value_counts())


    from sklearn.decomposition import PCA
    pca = PCA(n_components=2)
    components = pca.fit_transform(X_scaled)

    plt.figure(figsize=(8,6))
    plt.scatter(components[:, 0], components[:, 1], c=kmeans.labels_, cmap='viridis')
    plt.title('KMeans Clustering (PCA Visualization)')
    plt.xlabel('PCA Component 1')
    plt.ylabel('PCA Component 2')
    plt.show()

kmeans_clustering(X, num_clusters=2)
