# **Step 1 : Import Section**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# **Step-2 : Load the data**

In [None]:
df=pd.read_csv('dataset_1_Art and Craft Supplies.csv')

output = [] #List to store output results
output.append("Exploratory Data Analysis of Art and Craft Supplies.csv\n")
output.append("=====================================\n")

# **Step-3 : Explore the data**

In [None]:
print(df.head()) #Display the first few rows
output.append(f"first few rows:\n{df.head()}\n")

In [None]:
#Get basic information about the datase
df.info()

In [None]:
print(df.describe()) #Summary Statistics
output.append(f"Summary Statistics:\n{df.describe()}\n")

# **Step-4 : Clean the data**

In [None]:
output.append("Data cleaning results:\n:")

In [None]:
#Check for missing values.
missing_values = df.isnull().sum()
print(missing_values)
output.append(f"Missing values:\n{missing_values}\n")

In [None]:
#fill or drop missing values as necessary
df.dropna(inplace=True) #Drop rows with missing values

In [None]:
#Convert the date column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
#Check for duplicates
duplicates = df.duplicated().sum()
output.append(f"Number of Duplicates Dropped {duplicates}\n")
df.drop_duplicates(inplace=True) #Drop duplicate rows

# **Step-5 : Analyze the data**

In [None]:
#Total revevnue from Art & Craft Supplies.
total_revenue = df['Revenue'].sum()
output.append(f'Total Revenue: ${total_revenue}\n')
print(f"Total Revenue: ${total_revenue}")

In [None]:
#Average price per customer_Type wise
average_price_per_service = df.groupby('Customer_Type')['Price'].mean()
output.append("Average Price per Customer Type:\n")
output.append(f"{average_price_per_service}\n")
print(average_price_per_service)

In [None]:
#Total services provided by region
total_services_by_region = df['Region'].value_counts()
output.append("Total Services Provided by Region:\n")
output.append(f"{total_services_by_region}\n")
print(total_services_by_region)

In [None]:
#Total revenue by ragion.
revenue_by_region = df.groupby('Region')['Price'].sum()
output.append("Total Revenue by Region:\n")
output.append(f"{revenue_by_region}\n")
print(revenue_by_region)

# **Step-6 : Visualize the data**

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(total_services_by_region.index, total_services_by_region.values)
plt.title('Total Services Provided by Region')
plt.xlabel('Region')
plt.ylabel('Number of Services')
plt.xticks(rotation=45)
plt.savefig('total_services_by_region.png') #Save the figure
plt.show()

In [None]:
#pie chart for total services by region
plt.figure(figsize=(8, 8))
plt.pie(total_services_by_region.values, labels=total_services_by_region.index, autopct='%1.1f%%')
plt.title('Destribution of Services by Region')
plt.axis('equal') #Equal aspect ratio ensures that pie chart is circular
plt.savefig('services_by_region_pie.png') #Save the figure
plt.show()

In [None]:
#Bar plot of average price per service type
plt.figure(figsize=(10, 6))
plt.bar(average_price_per_service.index, average_price_per_service.values)
plt.title('Average Price per service type')
plt.xlabel('Service Type')
plt.ylabel('Average Price')
plt.xticks(rotation=45)
plt.savefig('average_price_per_service.png') #Save the figure
plt.show()

In [None]:
#pie chart of revenue by service type
plt.figure(figsize=(8, 8))
plt.pie(revenue_by_region.values, labels=revenue_by_region.index, autopct='%1.1f%%')
plt.title('Revenue Distribution by Region')
plt.axis('equal') #Equal aspect ratio ensures that pie chart is circular
plt.savefig('revenue_distribution_by_region_pie.png') #Save the figure
plt.show()

# **Step-7 : Save output file**

In [None]:
with open('analysis_output.txt', 'w') as file:
    file.writelines(output)