<a href="https://colab.research.google.com/github/poornimakr2001/python/blob/ds/olapop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Import necessary libraries
import pandas as pd

# Step 2: Create a sample dataset (simulating a sales data cube)
data = {
    'Region': ['North', 'North', 'East', 'East', 'South', 'South', 'West', 'West'],
    'Product': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    'Year': [2020, 2020, 2021, 2021, 2020, 2020, 2021, 2021],
    'Sales': [1500, 2000, 1800, 2200, 1700, 1900, 1600, 2100]
}

# Step 3: Convert the data into a Pandas DataFrame
df = pd.DataFrame(data)

# Step 4: Perform OLAP Operations

# (1) SLICE: Selecting data for the year 2020
slice_df = df[df['Year'] == 2020]
print("Slice operation (Year = 2020):\n", slice_df)

# (2) DICE: Selecting data where Region is 'North' or 'East' and Product is 'A'
dice_df = df[(df['Region'].isin(['North', 'East'])) & (df['Product'] == 'A')]
print("\nDice operation (Region = 'North' or 'East' and Product = 'A'):\n", dice_df)

# (3) ROLL-UP: Grouping sales data by Region
rollup_df = df.groupby('Region')['Sales'].sum().reset_index()
print("\nRoll-up operation (Sales by Region):\n", rollup_df)

# (4) DRILL-DOWN: Sales by Region and Year (breaking down the Region level by Year)
drilldown_df = df.groupby(['Region', 'Year'])['Sales'].sum().reset_index()
print("\nDrill-down operation (Sales by Region and Year):\n", drilldown_df)

# Step 5: Display the data
print("\nOriginal DataFrame:\n", df)


Slice operation (Year = 2020):
   Region Product  Year  Sales
0  North       A  2020   1500
1  North       B  2020   2000
4  South       A  2020   1700
5  South       B  2020   1900

Dice operation (Region = 'North' or 'East' and Product = 'A'):
   Region Product  Year  Sales
0  North       A  2020   1500
2   East       A  2021   1800

Roll-up operation (Sales by Region):
   Region  Sales
0   East   4000
1  North   3500
2  South   3600
3   West   3700

Drill-down operation (Sales by Region and Year):
   Region  Year  Sales
0   East  2021   4000
1  North  2020   3500
2  South  2020   3600
3   West  2021   3700

Original DataFrame:
   Region Product  Year  Sales
0  North       A  2020   1500
1  North       B  2020   2000
2   East       A  2021   1800
3   East       B  2021   2200
4  South       A  2020   1700
5  South       B  2020   1900
6   West       A  2021   1600
7   West       B  2021   2100
