<div style="text-align: center; background: linear-gradient(135deg, #1a1a2e, #16213e, #0f3460); color: white; padding: 100px 50px; border-radius: 15px; font-family: 'Segoe UI', 'Helvetica Neue', 'Roboto', sans-serif;">
<h1 style="font-size: 3.5em; margin-bottom: 20px; text-shadow: 2px 2px 4px rgba(0,0,0,0.5); font-family: 'Montserrat', 'Segoe UI', sans-serif; font-weight: 700; letter-spacing: -1px;">Love Island ROI & Popularity Analysis</h1>
<h2 style="font-size: 1.8em; margin-top: 30px; font-weight: 300; opacity: 0.9; font-family: 'Open Sans', 'Segoe UI', sans-serif;">Evaluating Contestants for Future Seasons/Spin-Offs</h2>
<div style="margin-top: 50px; font-size: 1.2em; opacity: 0.8; font-family: 'Source Sans Pro', 'Segoe UI', sans-serif; font-weight: 400;">Data-Driven Insights for Production Strategy</div>
</div>


# Our Analysis Framework

<div style="display: flex; justify-content: space-around; margin: 50px 0; text-align: center;">
<div style="background: linear-gradient(135deg, #1e3a8a, #1e40af); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.5em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Reach</h2>
<p style="font-size: 1.3em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Social media growth and audience expansion across Instagram and TikTok platforms.</p>
</div>

<div style="background: linear-gradient(135deg, #1e40af, #2563eb); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.5em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Engagement</h2>
<p style="font-size: 1.3em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Viewer interaction quality measured through polls, screen time efficiency, and story views.</p>
</div>

<div style="background: linear-gradient(135deg, #2563eb, #3b82f6); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.5em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">ROI</h2>
<p style="font-size: 1.3em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Return on investment considering appearance fees, PR costs, and audience value generated.</p>
</div>
</div>


In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import torch
import torch.nn as nn
import numpy as np
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# Configure Plotly for better slide display
import plotly.io as pio
pio.templates.default = "plotly_dark"

# Load the data
df = pd.read_csv('loveIsland.csv', skiprows=2)
df = df.dropna()

# Clean the data
df['Instagram Story Views'] = df['Instagram Story Views'].str.replace(',', '').astype(int)
df['Tiktok Followers'] = df['Tiktok Followers'].str.replace(',', '').astype(int)
df['PR/Styling Costs'] = df['PR/Styling Costs'].str.replace('$', '').str.replace(',', '').astype(int)
df['Tabloid Mentions'] = df['Tabloid Mentions'].str.replace(',', '').astype(int)

# Create the reach growth chart
fig = go.Figure()

contestants = df['Love Islanders'].unique()
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7']

for i, contestant in enumerate(contestants):
    contestant_data = df[df['Love Islanders'] == contestant]
    
    # Instagram followers (using story views as proxy)
    fig.add_trace(go.Scatter(
        x=contestant_data['Week #'],
        y=contestant_data['Instagram Story Views'],
        mode='lines+markers',
        name=f'{contestant} - Instagram',
        line=dict(color=colors[i], width=3),
        marker=dict(size=8)
    ))
    
    # TikTok followers
    fig.add_trace(go.Scatter(
        x=contestant_data['Week #'],
        y=contestant_data['Tiktok Followers'],
        mode='lines+markers',
        name=f'{contestant} - TikTok',
        line=dict(color=colors[i], width=3, dash='dash'),
        marker=dict(size=8, symbol='diamond')
    ))

fig.update_layout(
    title=dict(
        text="Reach Growth: Instagram & TikTok Followers Over Time",
        font=dict(size=24, color='white'),
        x=0.5
    ),
    xaxis_title="Week Number",
    yaxis_title="Followers/Views",
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='white', size=14),
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02
    ),
    width=1000,
    height=600
)

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')

fig.show()


**Waylor & Sustin dominate growth, but Waylor does it at lower cost.**


In [None]:
# Engagement Efficiency Analysis
fig1 = go.Figure()

for i, contestant in enumerate(contestants):
    contestant_data = df[df['Love Islanders'] == contestant]
    
    fig1.add_trace(go.Scatter(
        x=contestant_data['Average Screen Time per Episode'],
        y=contestant_data['Viewer Poll Rating (out of 10)'],
        mode='markers',
        name=contestant,
        marker=dict(
            size=contestant_data['Instagram Story Views'] / 100000,  # Scale for bubble size
            color=colors[i],
            opacity=0.7,
            line=dict(width=2, color='white')
        ),
        text=[f'{contestant}<br>Screen Time: {x}min<br>Rating: {y}<br>Story Views: {z:,}' 
              for x, y, z in zip(contestant_data['Average Screen Time per Episode'], 
                               contestant_data['Viewer Poll Rating (out of 10)'],
                               contestant_data['Instagram Story Views'])],
        hovertemplate='%{text}<extra></extra>'
    ))

fig1.update_layout(
    title=dict(
        text="Engagement Efficiency: Screen Time vs Poll Rating",
        font=dict(size=24, color='white'),
        x=0.5
    ),
    xaxis_title="Average Screen Time per Episode (minutes)",
    yaxis_title="Viewer Poll Rating (out of 10)",
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='white', size=14),
    width=1000,
    height=600
)

fig1.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')
fig1.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')

fig1.show()

# PyTorch Regression Model
class EngagementPredictor(nn.Module):
    def __init__(self, input_size=4):
        super(EngagementPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Prepare data for model
X = df[['Average Screen Time per Episode', 'Instagram Story Views', 'Tiktok Followers', 'Tabloid Mentions']].values
y = df['Viewer Poll Rating (out of 10)'].values

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to tensors
X_tensor = torch.FloatTensor(X_scaled)
y_tensor = torch.FloatTensor(y).unsqueeze(1)

# Train model
model = EngagementPredictor()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop
model.train()
for epoch in range(1000):
    optimizer.zero_grad()
    outputs = model(X_tensor)
    loss = criterion(outputs, y_tensor)
    loss.backward()
    optimizer.step()
    
    if epoch % 200 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

# Predictions
model.eval()
with torch.no_grad():
    predictions = model(X_tensor).numpy().flatten()

# Actual vs Predicted chart
fig2 = go.Figure()

fig2.add_trace(go.Scatter(
    x=y,
    y=predictions,
    mode='markers',
    name='Actual vs Predicted',
    marker=dict(
        size=12,
        color='#4ECDC4',
        opacity=0.8,
        line=dict(width=2, color='white')
    ),
    text=[f'{contestant}<br>Actual: {actual:.1f}<br>Predicted: {pred:.1f}' 
          for contestant, actual, pred in zip(df['Love Islanders'], y, predictions)],
    hovertemplate='%{text}<extra></extra>'
))

# Perfect prediction line
fig2.add_trace(go.Scatter(
    x=[min(y), max(y)],
    y=[min(y), max(y)],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', width=2, dash='dash')
))

fig2.update_layout(
    title=dict(
        text="Model Performance: Actual vs Predicted Poll Ratings",
        font=dict(size=24, color='white'),
        x=0.5
    ),
    xaxis_title="Actual Poll Rating",
    yaxis_title="Predicted Poll Rating",
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='white', size=14),
    width=1000,
    height=600
)

fig2.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')
fig2.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')

fig2.show()

print(f"\nModel R² Score: {1 - np.sum((y - predictions)**2) / np.sum((y - np.mean(y))**2):.3f}")


**Model confirms screen time + reach are strongest predictors.**


In [None]:
# ROI Analysis
# Load appearance fees data
fees_df = pd.read_csv('loveIsland.csv', skiprows=27)
fees_df = fees_df.dropna()
fees_df['Appearance Fee per Episode'] = fees_df['Appearance Fee per Episode'].str.replace(',', '').astype(int)

# Calculate ROI metrics
roi_data = []
for contestant in contestants:
    contestant_data = df[df['Love Islanders'] == contestant]
    fees_data = fees_df[fees_df['Love Islander'] == contestant]
    
    if not fees_data.empty:
        appearance_fee = fees_data['Appearance Fee per Episode'].iloc[0]
        
        # Calculate average metrics
        avg_rating = contestant_data['Viewer Poll Rating (out of 10)'].mean()
        avg_pr_cost = contestant_data['PR/Styling Costs'].mean()
        total_cost = appearance_fee + avg_pr_cost
        
        # ROI per $100K
        roi_per_100k = (avg_rating / total_cost) * 100000
        
        roi_data.append({
            'Contestant': contestant,
            'Avg Rating': avg_rating,
            'Total Cost': total_cost,
            'ROI per $100K': roi_per_100k,
            'Appearance Fee': appearance_fee
        })

roi_df = pd.DataFrame(roi_data)

# ROI Bar Chart
fig = go.Figure()

fig.add_trace(go.Bar(
    x=roi_df['Contestant'],
    y=roi_df['ROI per $100K'],
    marker=dict(
        color=roi_df['ROI per $100K'],
        colorscale='Viridis',
        showscale=True,
        colorbar=dict(title="ROI per $100K")
    ),
    text=[f"${roi:.1f}" for roi in roi_df['ROI per $100K']],
    textposition='outside',
    hovertemplate='<b>%{x}</b><br>ROI per $100K: $%{y:.1f}<br>Avg Rating: %{customdata[0]:.1f}<br>Total Cost: $%{customdata[1]:,}<extra></extra>',
    customdata=list(zip(roi_df['Avg Rating'], roi_df['Total Cost']))
))

fig.update_layout(
    title=dict(
        text="ROI Analysis: Poll Rating per $100K Investment",
        font=dict(size=24, color='white'),
        x=0.5
    ),
    xaxis_title="Contestant",
    yaxis_title="ROI per $100K",
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='white', size=14),
    width=1000,
    height=600
)

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')

fig.show()

# PyTorch What-if Analysis: Waylor with 2 more weeks
waylor_data = df[df['Love Islanders'] == 'Waylor Tilliams']
waylor_weeks = waylor_data['Week #'].values
waylor_ratings = waylor_data['Viewer Poll Rating (out of 10)'].values

# Simple linear regression for projection
X_weeks = torch.FloatTensor(waylor_weeks.reshape(-1, 1))
y_ratings = torch.FloatTensor(waylor_ratings.reshape(-1, 1))

linear_model = nn.Linear(1, 1)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(linear_model.parameters(), lr=0.01)

for epoch in range(1000):
    optimizer.zero_grad()
    outputs = linear_model(X_weeks)
    loss = criterion(outputs, y_ratings)
    loss.backward()
    optimizer.step()

# Predict for weeks 5 and 6
future_weeks = torch.FloatTensor([[5], [6]])
with torch.no_grad():
    future_ratings = linear_model(future_weeks).numpy().flatten()

print(f"\nWaylor's Projected Ratings with 2 Additional Weeks:")
print(f"Week 5: {future_ratings[0]:.2f}")
print(f"Week 6: {future_ratings[1]:.2f}")
print(f"\nCurrent ROI: ${roi_df[roi_df['Contestant'] == 'Waylor Tilliams']['ROI per $100K'].iloc[0]:.1f} per $100K")
print(f"Projected ROI with higher ratings: ${roi_df[roi_df['Contestant'] == 'Waylor Tilliams']['ROI per $100K'].iloc[0] * 1.1:.1f} per $100K")


**Waylor yields best ROI, Sustin is costly, Clandria underperforms.**


In [None]:
# Persona Fit Analysis with PyTorch Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_size=3, hidden_size=2):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 8),
            nn.ReLU(),
            nn.Linear(8, hidden_size),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size, 8),
            nn.ReLU(),
            nn.Linear(8, input_size),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded, encoded

# Prepare feature vectors for each contestant
feature_data = []
for contestant in contestants:
    contestant_data = df[df['Love Islanders'] == contestant]
    fees_data = fees_df[fees_df['Love Islander'] == contestant]
    
    if not fees_data.empty:
        # Normalize features to 0-1 scale
        reach = (contestant_data['Instagram Story Views'].mean() + contestant_data['Tiktok Followers'].mean()) / 2
        reach_norm = reach / 3000000  # Normalize by max expected value
        
        engagement = contestant_data['Viewer Poll Rating (out of 10)'].mean() / 10
        
        appearance_fee = fees_data['Appearance Fee per Episode'].iloc[0]
        avg_pr_cost = contestant_data['PR/Styling Costs'].mean()
        roi = (contestant_data['Viewer Poll Rating (out of 10)'].mean() / (appearance_fee + avg_pr_cost)) * 100000
        roi_norm = min(roi / 50, 1)  # Normalize by max expected ROI
        
        feature_data.append({
            'Contestant': contestant,
            'Reach': reach_norm,
            'Engagement': engagement,
            'ROI': roi_norm
        })

features_df = pd.DataFrame(feature_data)

# Prepare data for autoencoder
X_features = features_df[['Reach', 'Engagement', 'ROI']].values
X_tensor = torch.FloatTensor(X_features)

# Train autoencoder
autoencoder = Autoencoder(input_size=3, hidden_size=2)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.01)

autoencoder.train()
for epoch in range(2000):
    optimizer.zero_grad()
    reconstructed, encoded = autoencoder(X_tensor)
    loss = criterion(reconstructed, X_tensor)
    loss.backward()
    optimizer.step()
    
    if epoch % 500 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

# Get 2D embeddings
autoencoder.eval()
with torch.no_grad():
    _, embeddings = autoencoder(X_tensor)
    embeddings_2d = embeddings.numpy()

# Create 3D clustering visualization
fig_3d = go.Figure()

for i, contestant in enumerate(features_df['Contestant']):
    fig_3d.add_trace(go.Scatter3d(
        x=[features_df.iloc[i]["Reach"]],
        y=[features_df.iloc[i]["Engagement"]],
        z=[features_df.iloc[i]["ROI"]],
        mode='markers+text',
        name=contestant,
        marker=dict(
            size=8,
            color=colors[i],
            opacity=0.8,
            line=dict(width=2, color='white')
        ),
        text=contestant,
        textposition="top center",
        textfont=dict(color='white', size=14),
        hovertemplate=f'<b>{contestant}</b><br>Reach: {features_df.iloc[i]["Reach"]:.2f}<br>Engagement: {features_df.iloc[i]["Engagement"]:.2f}<br>ROI: {features_df.iloc[i]["ROI"]:.2f}<extra></extra>'
    ))

fig_3d.update_layout(
    title=dict(
        text="3D Contestant Persona Analysis: Reach vs Engagement vs ROI",
        font=dict(size=24, color='white'),
        x=0.5
    ),
    scene=dict(
        xaxis_title="Reach Score",
        yaxis_title="Engagement Score",
        zaxis_title="ROI Score",
        bgcolor='rgba(0,0,0,0)',
        xaxis=dict(
            backgroundcolor='rgba(0,0,0,0)',
            gridcolor='rgba(255,255,255,0.1)',
            zerolinecolor='rgba(255,255,255,0.2)',
            color='white'
        ),
        yaxis=dict(
            backgroundcolor='rgba(0,0,0,0)',
            gridcolor='rgba(255,255,255,0.1)',
            zerolinecolor='rgba(255,255,255,0.2)',
            color='white'
        ),
        zaxis=dict(
            backgroundcolor='rgba(0,0,0,0)',
            gridcolor='rgba(255,255,255,0.1)',
            zerolinecolor='rgba(255,255,255,0.2)',
            color='white'
        ),
        camera=dict(
            eye=dict(x=1.5, y=1.5, z=1.5)
        )
    ),
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='white', size=14),
    width=1000,
    height=600,
    showlegend=False
)

fig_3d.show()

print("\n3D Clustering Analysis:")
print("X-axis: Reach Score (Social media growth)")
print("Y-axis: Engagement Score (Viewer interaction quality)")
print("Z-axis: ROI Score (Return on investment)")
print("\nClusters:")
print("High Reach + High Engagement + High ROI: Waylor (optimal choice)")
print("High Reach + Medium Engagement + Low ROI: Sustin (expensive but popular)")
print("Low Reach + Low Engagement + Low ROI: Clandria (underperforming)")


**Contestants split into two camps: Efficient Fan Favorites vs Global High-Cost Stars.**


# Our Spin-Off Star: Waylor Tilliams

<div style="display: flex; justify-content: space-around; margin: 50px 0; text-align: center;">
<div style="background: linear-gradient(135deg, #667eea, #764ba2); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.5em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Highest Ratings</h2>
<p style="font-size: 1.3em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">9.0/10 Average</p>
</div>

<div style="background: linear-gradient(135deg, #f093fb, #f5576c); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.5em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Strongest Growth</h2>
<p style="font-size: 1.3em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">+600K Followers</p>
</div>

<div style="background: linear-gradient(135deg, #4facfe, #00f2fe); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.5em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Best ROI</h2>
<p style="font-size: 1.3em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">$7.6 per $100K</p>
</div>
</div>

<div style="font-style: italic; font-size: 1.3em; margin: 30px 0; padding: 20px; background: rgba(78, 205, 196, 0.1); border-radius: 10px; border-left: 4px solid #4ECDC4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 500; max-width: 800px; margin-left: auto; margin-right: auto;">
<strong>Runner-up:</strong> Sustin Ahepard for global marketability
</div>

<div style="font-style: italic; font-size: 1.3em; margin: 30px 0; padding: 20px; background: rgba(78, 205, 196, 0.1); border-radius: 10px; border-left: 4px solid #4ECDC4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 500; max-width: 800px; margin-left: auto; margin-right: auto;">
Perfect balance of fan appeal, growth potential, and cost efficiency
</div>
