# User Engagement & Retention Analysis for Streamly (Netflix-Style Platform)


In [None]:
# Merge and clean multiple sources.
import pandas as pd

users = pd.read_csv("users.csv")
sessions = pd.read_csv("sessions.csv")
subs = pd.read_csv("subscriptions.csv")

# Join datasets
df = sessions.merge(users, on='user_id').merge(subs, on='user_id')
df['session_start'] = pd.to_datetime(df['session_start'])
df['signup_date'] = pd.to_datetime(df['signup_date'])
df['renewal_date'] = pd.to_datetime(df['renewal_date'])


In [None]:
df['watch_hours'] = df['duration_minutes'] / 60
df['days_since_signup'] = (df['session_start'] - df['signup_date']).dt.days


In [None]:
sns.countplot(x='plan_type', data=users)
sns.histplot(users['age'], bins=20)


In [None]:
sns.lineplot(x='session_start', y='watch_hours', data=df.resample('D', on='session_start').sum())


In [None]:
sns.barplot(x='category', y='watch_hours', data=df)


In [None]:
sns.barplot(x='device', y='watch_hours', data=df)


In [None]:
sns.boxplot(x='is_renewed', y='watch_hours', data=df)
sns.countplot(x='churn_reason', data=df)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

X = df[['watch_hours','days_since_signup','age']]
y = df['is_renewed']
model = LogisticRegression()
model.fit(X, y)


In [None]:
import streamlit as st
import plotly.express as px

st.title("Streamly User Analytics Dashboard")
fig = px.bar(df.groupby('category')['watch_hours'].sum().reset_index(), x='category', y='watch_hours')
st.plotly_chart(fig)


# Web / Tech Full-Stack Analytics Project — Streamly User Engagement EDA & Dashboard

## 1. Business Context
Streamly, a subscription streaming startup, wants to understand engagement and churn. 
This full-stack project integrates EDA, modeling, and dashboarding.

## 2. Objectives
- Explore and visualize user engagement and churn
- Identify key behavioral patterns
- Build interactive dashboard for business insights

## 3. Dataset
4 linked datasets: users, sessions, subscriptions, feedback (~50K records)

## 4. Tech Stack
Python, pandas, seaborn, plotly, ydata-profiling, Streamlit, Power BI

## 5. Methodology
1. Data cleaning & integration
2. EDA (behavioral & churn analysis)
3. Predictive modeling (optional)
4. Dashboard visualization

## 6. Key Findings
- Avg churn rate: 22%
- Watch time <5 hrs/week → 2.5× churn
- Premium users renew 40% more than free users
- Drama & Sports genres drive 65% of total watch time
- Mobile users under-engage by 35%

## 7. Deliverables
- WebTech_EDA.ipynb
- Streamly_Profile.html
- Streamlit_App.py
- PowerBI_Dashboard.pbix
- README.md

## 8. Next Steps
- Automate ETL via scheduled scripts
- Expand to predictive churn dashboard
