# Day 9: Instagram Stories Daily User Creation Patterns

You are a Product Analyst on the Instagram Stories team investigating story creation patterns. The team wants to understand the distribution of stories created by users daily. You will analyze user storytelling behavior to optimize engagement strategies.

In [None]:
import pandas as pd
import numpy as np

stories_data_data = [
  {
    "user_id": "user_001",
    "story_date": "2024-07-03",
    "story_count": 3
  },
  {
    "user_id": "user_001",
    "story_date": "2024-07-03",
    "story_count": 3
  },
  {
    "user_id": "user_001",
    "story_date": "2024-08-15",
    "story_count": 5
  },
  {
    "user_id": "user_001",
    "story_date": "2024-09-10",
    "story_count": 0
  },
  {
    "user_id": "user_001",
    "story_date": "2024-10-05",
    "story_count": 20
  },
  {
    "user_id": "user_001",
    "story_date": "07/15/2024",
    "story_count": 2
  },
  {
    "user_id": "user_002",
    "story_date": "2024-07-03",
    "story_count": 4
  },
  {
    "user_id": " user_002",
    "story_date": "2024-07-04",
    "story_count": 3
  },
  {
    "user_id": "user_002",
    "story_date": null,
    "story_count": 6
  },
  {
    "user_id": "user_002",
    "story_date": "2024-12-25",
    "story_count": 1
  },
  {
    "user_id": "user_002",
    "story_date": "2025-01-15",
    "story_count": 7
  },
  {
    "user_id": "user_002",
    "story_date": "2025-06-29",
    "story_count": 10
  },
  {
    "user_id": "user_003",
    "story_date": "2024-07-10",
    "story_count": 2
  },
  {
    "user_id": "user_003",
    "story_date": "2024-08-20",
    "story_count": 8
  },
  {
    "user_id": "user_003",
    "story_date": "2024-08-20",
    "story_count": 8
  },
  {
    "user_id": "user_003",
    "story_date": "2025-03-11",
    "story_count": 5
  },
  {
    "user_id": null,
    "story_date": "2025-03-12",
    "story_count": 3
  },
  {
    "user_id": "USER_003",
    "story_date": "2025-04-01",
    "story_count": 4
  },
  {
    "user_id": "user_004",
    "story_date": "2024-07-15",
    "story_count": 6
  },
  {
    "user_id": "user_004",
    "story_date": "2024-09-30",
    "story_count": 7
  },
  {
    "user_id": "user_004",
    "story_date": "2024/10/10",
    "story_count": 4
  },
  {
    "user_id": "user_004",
    "story_date": "2024-11-11",
    "story_count": 3
  },
  {
    "user_id": "user_004",
    "story_date": "2025-02-28",
    "story_count": 12
  },
  {
    "user_id": "user_004",
    "story_date": "2025-03-01",
    "story_count": 0
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-01",
    "story_count": 1
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-02",
    "story_count": 2
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-03",
    "story_count": 3
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-04",
    "story_count": 4
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-05",
    "story_count": null
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-06",
    "story_count": 5
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-01",
    "story_count": 9
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-02",
    "story_count": 10
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-03",
    "story_count": 9
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-04",
    "story_count": 50
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-05",
    "story_count": 8
  },
  {
    "user_id": "user_006",
    "story_date": null,
    "story_count": 7
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-10",
    "story_count": 4
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-11",
    "story_count": 4
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-12",
    "story_count": 4
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-13",
    "story_count": 3
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-14",
    "story_count": 2
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-15",
    "story_count": 1
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-01",
    "story_count": 11
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-02",
    "story_count": 12
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-03",
    "story_count": 13
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-04",
    "story_count": 14
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-05",
    "story_count": 15
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-06",
    "story_count": 0
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-01",
    "story_count": 1
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-02",
    "story_count": 2
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-03",
    "story_count": 3
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-04",
    "story_count": 4
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-05",
    "story_count": 5
  },
  {
    "user_id": "user_009",
    "story_date": "invalid_date",
    "story_count": 6
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-15",
    "story_count": 7
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-16",
    "story_count": 8
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-17",
    "story_count": 9
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-18",
    "story_count": 10
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-19",
    "story_count": 11
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-20",
    "story_count": 12
  }
]
stories_data = pd.DataFrame(stories_data_data)


## Question 1

Take a look at the data in the story_date column. Correct any data type inconsistencies in that column.

In [None]:
import pandas as pd

# Build the DataFrame exactly as shown
data = {
    "user_id": ["user_001","user_001","user_001","user_001","user_001","user_001",
                "user_002","user_002","user_002","user_002","user_002"],
    "story_date": ["2024-07-03","2024-07-03","2024-08-15","2024-09-10","2024-10-05","07/15/2024",
                   "2024-07-03","2024-07-04","", "2024-12-25","2025-01-15"],
    "story_count": [3,3,5,0,20,2,4,3,6,1,7]
}
df = pd.DataFrame(data)

# 1) Clean raw text (preserve missing as NA)
s = pd.Series(df["story_date"], dtype="string").str.strip().replace("", pd.NA)

# 2) Masks for formats
iso_mask   = s.str.match(r"^\d{4}-\d{2}-\d{2}$", na=False)       # YYYY-MM-DD
slash_mask = s.str.match(r"^\d{1,2}/\d{1,2}/\d{4}$", na=False)   # MM/DD/YYYY

# 3) Create a datetime64[ns] Series and fill via explicit parsing per format
clean = pd.Series(pd.NaT, index=df.index, dtype="datetime64[ns]")
clean.loc[iso_mask]   = pd.to_datetime(s.loc[iso_mask],   format="%Y-%m-%d", errors="coerce")
clean.loc[slash_mask] = pd.to_datetime(s.loc[slash_mask], format="%m/%d/%Y", errors="coerce")

# 4) Assign back
df["story_date"] = clean

# (Optional) check dtype/result
print(df)
# print(df["story_date"].dtype)  # -> datetime64[ns]

## Question 2

Calculate the 25th, 50th, and 75th percentiles of the number of stories created per user per day.

In [None]:
import pandas as pd

# Assuming df is your cleaned dataframe
# Drop rows where story_date is missing (NaT), if needed
df = df.dropna(subset=['story_date'])

# Calculate percentiles for story_count
percentiles = df['story_count'].quantile([0.25, 0.50, 0.75])

print(percentiles)

## Question 3

What percentage of users have had at least one day, where they posted more than 10 stories on that day?

In [None]:
import pandas as pd

# Sample DataFrame (replace with your actual data)
data = {
    "user_id": ["user_001","user_001","user_001","user_001","user_001","user_001",
                "user_002","user_002","user_002","user_002","user_002"],
    "story_date": ["2024-07-03","2024-07-03","2024-08-15","2024-09-10","2024-10-05","07/15/2024",
                   "2024-07-03","2024-07-04","", "2024-12-25","2025-01-15"],
    "story_count": [3,3,5,0,20,2,4,3,6,1,7]
}

df = pd.DataFrame(data)

# Step 1: Clean story_date (convert to datetime)
df["story_date"] = pd.to_datetime(df["story_date"], errors="coerce", infer_datetime_format=True)

# Step 2: Drop rows with missing dates
df_valid = df.dropna(subset=['story_date'])

# Step 3: Find users who ever posted more than 10 stories in a day
users_over_10 = df_valid.loc[df_valid['story_count'] > 10, 'user_id'].unique()

# Step 4: Total number of unique users
total_users = df_valid['user_id'].nunique()

# Step 5: Calculate percentage
percentage = (len(users_over_10) / total_users) * 100

print(f"{percentage:.2f}% of users had at least one day with more than 10 stories")

Made with ❤️ by [Interview Master](https://www.interviewmaster.ai)