In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset into a pandas DataFrame
df = pd.read_csv('avocado.csv')

# Display the first few rows of the dataset
print(df.head())

# Basic information about the dataset
print(df.info())

# Summary statistics of numerical columns
print(df.describe())

# Distribution of Average Price
plt.figure(figsize=(10, 6))
sns.histplot(df['AveragePrice'], bins=30, kde=True)
plt.xlabel('Average Price')
plt.ylabel('Frequency')
plt.title('Distribution of Average Price')
plt.show()

# Distribution of Average Price by type
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='type', y='AveragePrice')
plt.xlabel('Type')
plt.ylabel('Average Price')
plt.title('Distribution of Average Price by Type')
plt.show()

# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

# Time series analysis of Average Price
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
plt.figure(figsize=(12, 6))
df['AveragePrice'].plot()
plt.xlabel('Date')
plt.ylabel('Average Price')
plt.title('Time Series Analysis of Average Price')
plt.show()

# Average Price by region
plt.figure(figsize=(12, 6))
sns.boxplot(data=df, x='region', y='AveragePrice')
plt.xlabel('Region')
plt.ylabel('Average Price')
plt.title('Average Price by Region')
plt.xticks(rotation=90)
plt.show()

# Total Volume by region
plt.figure(figsize=(12, 6))
sns.boxplot(data=df, x='region', y='Total Volume')
plt.xlabel('Region')
plt.ylabel('Total Volume')
plt.title('Total Volume by Region')
plt.xticks(rotation=90)
plt.show()

# Bar plot of region counts
plt.figure(figsize=(12, 6))
df['region'].value_counts().plot(kind='bar')
plt.xlabel('Region')
plt.ylabel('Count')
plt.title('Distribution of Observations by Region')
plt.xticks(rotation=90)
plt.show()
