Load the dataset from a CSV file.

In [None]:
import pandas as pd
df = pd.read_csv('data.csv')

Generate an overview of the data including the first few rows, info, and descriptive statistics.

In [None]:
print(df.head())
print(df.info())
print(df.describe())

Identify numeric and categorical columns in the dataset.

In [None]:
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

Create a correlation matrix heatmap to visualize relationships between numeric variables.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True)
plt.show()

Get value counts for each categorical column.

In [None]:
value_counts = df[categorical_cols].apply(pd.Series.value_counts)
print(value_counts)

Create a pie chart to show the distribution of records by country.

In [None]:
df['Country'].value_counts().plot.pie(autopct='%1.1f%%')
plt.title('Country Distribution')
plt.show()

Generate histograms, boxplots, and KDEs for numeric columns.

In [None]:
sns.histplot(df[numeric_cols])
sns.boxplot(data=df[numeric_cols])
sns.kdeplot(data=df[numeric_cols])
plt.show()

Perform aggregated price analysis by source.

In [None]:
aggregated_price = df.groupby('Source')['Price'].agg('mean').reset_index()
print(aggregated_price)

Analyze price by country.

In [None]:
price_by_country = df.groupby('Country')['Price'].agg('mean').reset_index()
print(price_by_country)

Segment new customers from the dataset.

In [None]:
new_customers = df[df['CustomerType'] == 'New']
print(new_customers)

Analyze customer segments based on price data.

In [None]:
customer_segments = new_customers.groupby('Segment')['Price'].agg('mean').reset_index()
print(customer_segments)

Conduct level-based analysis on customer pricing.

In [None]:
customer_level_analysis = new_customers.groupby('CustomerID')['Price'].agg(['mean', 'count']).reset_index()
print(customer_level_analysis)

Visualize the price distribution by customer segments.

In [None]:
sns.histplot(data=new_customers, x='Price', hue='Segment')
plt.title('Price Distribution by Segment')
plt.show()

Train a model to predict new customers.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
X_train, X_test, y_train, y_test = train_test_split(df[numeric_cols], df['Target'], test_size=0.2)
model = LogisticRegression()
model.fit(X_train, y_train)

Classify new customers based on the trained model.

In [None]:
predictions = model.predict(X_test)
print(predictions)