In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('/kaggle/input/ecommerce-data/data.csv', encoding='ISO-8859-1')

df.info()

In [None]:
df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)]

In [None]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

In [None]:
df['TotalAmount'] = df['Quantity'] * df['UnitPrice']

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
monthly_sales = df.set_index('InvoiceDate').resample('M')['TotalAmount'].sum()

plt.figure(figsize=(12, 6))
monthly_sales.plot(linewidth=2, color='green', marker='o')

plt.title('Динамика продаж по месяцам', fontsize=16)
plt.ylabel('Выручка (миллионы)', fontsize=12)
plt.grid(True) 
plt.show()

In [None]:
top_countries = df.groupby('Country')['TotalAmount'].sum().sort_values(ascending=False).head(10)

plt.figure(figsize=(10, 6))
sns.barplot(x=top_countries.values, y=top_countries.index, palette='viridis')

plt.title('Топ-10 стран по выручке')
plt.xlabel('Выручка')
plt.show()

In [None]:
snapshot_date = df['InvoiceDate'].max() + pd.Timedelta(days=1)

rfm = df.groupby('CustomerID').agg({
    'InvoiceDate': lambda x: (snapshot_date - x.max()).days, 
    'InvoiceNo': 'nunique',                                  
    'TotalAmount': 'sum'                                     
})

rfm.rename(columns={
    'InvoiceDate': 'Recency',
    'InvoiceNo': 'Frequency',
    'TotalAmount': 'Monetary'
}, inplace=True)

rfm.head()