**Step 1: Import Libraries**

In [40]:
from pymongo import MongoClient
from datetime import datetime
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px

**Step 2: Set Up the Dash Application**

In [41]:
# Initialize the Dash app
app = dash.Dash(__name__)

**Step 3: Load the data**

In [42]:
sales_data = pd.read_csv('sales_data_500.csv')
print(sales_data.head())

         date product_id  sales_amount store_location
0  2023-01-01       P005           678          Miami
1  2023-01-01       P003           508    Los Angeles
2  2023-01-01       P007           320          Miami
3  2023-01-01       P010           985        Chicago
4  2023-01-01       P003           655        Houston


**Step 4: Create the Layout for the Dashboard**

In [43]:
# Create the layout for the Sales Dashboard
app.layout = html.Div([
    # Header
    html.H1("Sales Dashboard", style={'textAlign': 'center'}),

    # Description
    html.Div("Use the controls below to explore sales data interactively.", style={'textAlign': 'center'}),

    # Dropdown for selecting a store location
    html.Div([
        html.Label("Select Store Location:"),
        dcc.Dropdown(
            id='store-location-dropdown',
            options=[
                {'label': location, 'value': location} 
                for location in sales_data['store_location'].unique()
            ],
            value=sales_data['store_location'].unique()[0],  # Default value
        ),
    ], style={'width': '50%'}),

    # Slider for selecting a sales range
    html.Div([
        html.Label("Select Sales Range:"),
        dcc.RangeSlider(
            id='sales-slider',
            min=sales_data['sales_amount'].min(),
            max=sales_data['sales_amount'].max(),
            step=10,
            marks={int(sale): str(int(sale)) for sale in range(
                int(sales_data['sales_amount'].min()), 
                int(sales_data['sales_amount'].max()) + 100, 
                100
            )},
            value=[sales_data['sales_amount'].min(), sales_data['sales_amount'].max()]  # Default range
        )
    ], style={'width': '50%'}),

    # Graphs
    html.Div([
        dcc.Graph(id='sales-bar-chart'),  # Bar chart placeholder
        dcc.Graph(id='sales-line-chart')  # Line chart placeholder
    ])
])


**Step 5: Create the Callback Functions for Interactivity**

In [44]:
@app.callback(
    [Output('sales-bar-chart', 'figure'),
     Output('sales-line-chart', 'figure')],
    [Input('store-location-dropdown', 'value'),
     Input('sales-slider', 'value')]
)
def update_graphs(selected_location, sales_range):
    # Filter the dataset based on selected store location and sales range
    filtered_df = sales_data[
        (sales_data['store_location'] == selected_location) & 
        (sales_data['sales_amount'] >= sales_range[0]) & 
        (sales_data['sales_amount'] <= sales_range[1])
    ]

    # Handle empty dataset
    if filtered_df.empty:
        no_data_figure = px.bar(
            title="No Data Available",
            labels={'sales_amount': 'Sales Amount', 'store_location': 'Store Location'}
        )
        return no_data_figure, no_data_figure

    # --- Bar Chart: Total Sales by Product ---
    sales_by_product = (
        filtered_df.groupby('product_id')['sales_amount']
        .sum()
        .reset_index()
    )
    bar_chart = px.bar(
        sales_by_product,
        x='product_id',
        y='sales_amount',
        title=f"Total Sales by Product in {selected_location}",
        labels={'product_id': 'Product ID', 'sales_amount': 'Sales Amount'}
    )

    # --- Line Chart: Sales Trends Over Time ---
    sales_trends = (
        filtered_df.groupby('date')['sales_amount']
        .sum()
        .reset_index()
    )
    line_chart = px.line(
        sales_trends,
        x='date',
        y='sales_amount',
        title=f"Sales Trends Over Time in {selected_location}",
        labels={'date': 'Date', 'sales_amount': 'Sales Amount'}
    )

    return bar_chart, line_chart


**Step 7: Run the App**

In [45]:
if __name__ == '__main__':
    app.run_server(debug=True, port=8010)


**Week 2**

**Task 1: Connect to database**

In [46]:

connection_string = "mongodb+srv://dboukmoussa:Moussa123@clusteraai634o.ee4fv.mongodb.net/?retryWrites=true&w=majority&appName=ClusterAAI634O"

# Connect to the MongoDB Atlas cluster
client = MongoClient(connection_string)

#Access the database and collection
db = client['sales_db']
sales_collection = db['sales']

# Read the CSV file
csv_url = "https://raw.githubusercontent.com/moussadbouk/AAI634O/main/sales_data_500.csv"
sales_data = pd.read_csv(csv_url)

# Convert DataFrame to JSON format and insert into MongoDB
sales_json = sales_data.to_dict(orient='records')
result = sales_collection.insert_many(sales_json)

# Verify insertion
print(f"Inserted {len(result.inserted_ids)} records into the 'sales' collection.")

Inserted 500 records into the 'sales' collection.


**Task 2: Perform CRUD operations on MongoDB**

a) Create Operation

In [47]:
# Insert 10 additional sales records
new_sales_records = [
    {"date": "2025-01-21", "product_id": "P501", "sales_amount": 250, "store_location": "New York"},
    {"date": "2025-01-22", "product_id": "P502", "sales_amount": 400, "store_location": "Los Angeles"},
    {"date": "2025-01-23", "product_id": "P503", "sales_amount": 300, "store_location": "Chicago"},
    {"date": "2025-01-24", "product_id": "P504", "sales_amount": 150, "store_location": "Houston"},
    {"date": "2025-01-25", "product_id": "P505", "sales_amount": 600, "store_location": "Seattle"},
    {"date": "2025-01-26", "product_id": "P506", "sales_amount": 500, "store_location": "Boston"},
    {"date": "2025-01-27", "product_id": "P507", "sales_amount": 350, "store_location": "Miami"},
    {"date": "2025-01-28", "product_id": "P508", "sales_amount": 700, "store_location": "San Francisco"},
    {"date": "2025-01-29", "product_id": "P509", "sales_amount": 800, "store_location": "Denver"},
    {"date": "2025-01-30", "product_id": "P510", "sales_amount": 900, "store_location": "Portland"}
]

# Insert new records into the sales collection
result = sales_collection.insert_many(new_sales_records)
print(f"Inserted {len(result.inserted_ids)} new sales records.")


Inserted 10 new sales records.


b) Read Operation

In [48]:
# Query the first 10 sales records
sales_records = sales_collection.find().limit(10)
print("First 10 sales records:")
for sale in sales_records:
    print(sale)


First 10 sales records:
{'_id': ObjectId('67909f2d3040f9b9821072ff'), 'date': '2025-01-21', 'product_id': 'P501', 'sales_amount': 600, 'store_location': 'New York'}
{'_id': ObjectId('67909f2d3040f9b982107300'), 'date': '2025-01-22', 'product_id': 'P502', 'sales_amount': 400, 'store_location': 'Los Angeles'}
{'_id': ObjectId('67909f2d3040f9b982107301'), 'date': '2025-01-23', 'product_id': 'P503', 'sales_amount': 300, 'store_location': 'Chicago'}
{'_id': ObjectId('67909f2d3040f9b982107302'), 'date': '2025-01-24', 'product_id': 'P504', 'sales_amount': 150, 'store_location': 'Houston'}
{'_id': ObjectId('67909f2d3040f9b982107303'), 'date': '2025-01-25', 'product_id': 'P505', 'sales_amount': 600, 'store_location': 'Seattle'}
{'_id': ObjectId('67909f2d3040f9b982107304'), 'date': '2025-01-26', 'product_id': 'P506', 'sales_amount': 500, 'store_location': 'Boston'}
{'_id': ObjectId('67909f2d3040f9b982107305'), 'date': '2025-01-27', 'product_id': 'P507', 'sales_amount': 350, 'store_location': 'Mi

c) Update Operation

In [49]:
# Increase sales amount for a specific product
update_query = {"product_id": "P501"}
update_action = {"$inc": {"sales_amount": 50}}  # Increment sales amount by 50
result = sales_collection.update_one(update_query, update_action)
print(f"Modified {result.modified_count} record(s).")


Modified 1 record(s).


d) Delete Operation

In [50]:
# Remove sales records before a specific date
delete_query = {"date": {"$lt": "2023-01-02"}}  # Delete records before 2023-01-02
result = sales_collection.delete_many(delete_query)
print(f"Deleted {result.deleted_count} record(s).")


Deleted 32 record(s).


**Task 3: Visualize data from MongoDB using Dash**

In [51]:
# Initialize the Dash app
app2 = dash.Dash(__name__)

# MongoDB Connection
from pymongo import MongoClient

# Connect to MongoDB
connection_string = "mongodb+srv://dboukmoussa:Moussa123@clusteraai634o.ee4fv.mongodb.net/?retryWrites=true&w=majority&appName=ClusterAAI634O"
client = MongoClient(connection_string)

# Access the 'sales_db' database and 'sales' collection
db = client['sales_db']
sales_collection = db['sales']

# Function to Fetch Data from MongoDB
def fetch_sales_data():
    """
    Query data from MongoDB 'sales' collection and return as a pandas DataFrame.
    """
    sales_data = list(sales_collection.find())  # Retrieve all documents from the 'sales' collection
    return pd.DataFrame(sales_data)  # Convert to pandas DataFrame


In [52]:
# Fetch sales data from MongoDB
sales_data = fetch_sales_data()

# Create the layout for the Sales Dashboard
app2.layout = html.Div([
    # Header
    html.H1("Sales Dashboard", style={'textAlign': 'center'}),

    # Description
    html.Div("Use the controls below to explore sales data interactively.", style={'textAlign': 'center'}),

    # Dropdown for selecting a store location
    html.Div([
        html.Label("Select Store Location:"),
        dcc.Dropdown(
            id='store-location-dropdown',
            options=[
                {'label': location, 'value': location} 
                for location in sales_data['store_location'].unique()
            ],
            value=sales_data['store_location'].unique()[0],  # Default value
        ),
    ], style={'width': '50%'}),

    # Slider for selecting a sales range
    html.Div([
        html.Label("Select Sales Range:"),
        dcc.RangeSlider(
            id='sales-slider',
            min=sales_data['sales_amount'].min(),
            max=sales_data['sales_amount'].max(),
            step=10,
            marks={int(sale): str(int(sale)) for sale in range(
                int(sales_data['sales_amount'].min()), 
                int(sales_data['sales_amount'].max()) + 100, 
                100
            )},
            value=[sales_data['sales_amount'].min(), sales_data['sales_amount'].max()]  # Default range
        )
    ], style={'width': '50%'}),

    # Graphs
    html.Div([
        dcc.Graph(id='sales-bar-chart'),  # Bar chart placeholder
        dcc.Graph(id='sales-line-chart')  # Line chart placeholder
    ])
])


In [53]:
@app2.callback(
    [Output('sales-bar-chart', 'figure'),
     Output('sales-line-chart', 'figure')],
    [Input('store-location-dropdown', 'value'),
     Input('sales-slider', 'value')]
)
def update_graphs(selected_location, sales_range):
    # Fetch the latest sales data
    latest_sales_data = fetch_sales_data()

    # Filter the dataset based on selected store location and sales range
    filtered_df = latest_sales_data[
        (latest_sales_data['store_location'] == selected_location) &
        (latest_sales_data['sales_amount'] >= sales_range[0]) &
        (latest_sales_data['sales_amount'] <= sales_range[1])
    ]

    # Handle empty dataset
    if filtered_df.empty:
        no_data_figure = px.bar(
            title="No Data Available",
            labels={'sales_amount': 'Sales Amount', 'store_location': 'Store Location'}
        )
        return no_data_figure, no_data_figure

    # --- Bar Chart: Total Sales by Product ---
    sales_by_product = (
        filtered_df.groupby('product_id')['sales_amount']
        .sum()
        .reset_index()
    )
    bar_chart = px.bar(
        sales_by_product,
        x='product_id',
        y='sales_amount',
        title=f"Total Sales by Product in {selected_location}",
        labels={'product_id': 'Product ID', 'sales_amount': 'Sales Amount'}
    )

    # --- Line Chart: Sales Trends Over Time ---
    sales_trends = (
        filtered_df.groupby('date')['sales_amount']
        .sum()
        .reset_index()
    )
    line_chart = px.line(
        sales_trends,
        x='date',
        y='sales_amount',
        title=f"Sales Trends Over Time in {selected_location}",
        labels={'date': 'Date', 'sales_amount': 'Sales Amount'}
    )

    return bar_chart, line_chart


In [55]:
if __name__ == '__main__':
    app2.run_server(debug=True, port=8020)
