In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data_path = '/content/laptop_cleaned2.csv'
laptop_data = pd.read_csv(data_path)

# Dropping the unnecessary index column and handling missing values
laptop_data.drop(columns=['Unnamed: 0'], inplace=True)
laptop_data['Graphics_GB'].fillna(laptop_data['Graphics_GB'].median(), inplace=True)
laptop_data.dropna(subset=['Processor_gen'], inplace=True)
laptop_data['Core_per_processor'].fillna(laptop_data['Core_per_processor'].median(), inplace=True)
laptop_data['Threads'].fillna(laptop_data['Threads'].median(), inplace=True)

# Create individual plots to save as separate PNG files

# Brand distribution
plt.figure(figsize=(8, 6))
sns.countplot(y='Brand', data=laptop_data, order=laptop_data['Brand'].value_counts().index)
plt.title('Brand Distribution')
plt.xlabel('Count')
plt.ylabel('Brand')
plt.savefig('brand_distribution.png')  # Save the figure
plt.close()

# Operating System distribution
plt.figure(figsize=(8, 6))
sns.countplot(y='Operating_system', data=laptop_data, order=laptop_data['Operating_system'].value_counts().index)
plt.title('Operating System Distribution')
plt.xlabel('Count')
plt.ylabel('Operating System')
plt.savefig('operating_system_distribution.png')  # Save the figure
plt.close()

# Price vs. Rating scatter plot
plt.figure(figsize=(8, 6))
sns.scatterplot(x='Price', y='Rating', data=laptop_data)
plt.title('Price vs. Rating')
plt.xlabel('Price')
plt.ylabel('Rating')
plt.savefig('price_vs_rating.png')  # Save the figure
plt.close()

# Correlation matrix of numerical features
numerical_features = laptop_data.select_dtypes(include=['int64', 'float64'])
corr_matrix = numerical_features.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.savefig('correlation_matrix.png')  # Save the figure
plt.close()


  plt.savefig('operating_system_distribution.png')  # Save the figure
