In [7]:
import folium
from folium.plugins import MarkerCluster
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import geopandas as gpd
import matplotlib.pyplot as plt
from ipywidgets import widgets, Output
from folium.plugins import TimeSliderChoropleth
from IPython.display import display
import numpy as np
import panel as pn
import hvplot.pandas
import plotly.express as px

# Initialize Panel with extensions
pn.extension('plotly', design='material')

In [10]:
url = 'https://raw.githubusercontent.com/TheNewtonsRing/Global-YouTube-Statistics-2023/main/Global%20YouTube%20Statistics.csv'

In [11]:

# Preprocessing
def get_data_fromurl(url):
    df = pd.read_csv(url, encoding='latin1')
    # Display the first few rows of the DataFrame
    df.to_csv('global_youtube_statistics.csv', index=False)
    missing_values = df.isna().sum()
    df_interpolated = df.interpolate(method='linear')
    categorical_columns = ['category', 'Country', 'Abbreviation', 'channel_type']
    for column in categorical_columns:
        df_interpolated[column].fillna(method='ffill', inplace=True)

    df_interpolated['created_month'].fillna(method='bfill', inplace=True)
    df = df.dropna(subset=['Latitude', 'Longitude'])
    df.to_csv('global_youtube_statistics.csv', index=False)
    df = pd.read_csv('global_youtube_statistics.csv')
    return df

In [13]:
df =  get_data_fromurl(url)
df.head()

Unnamed: 0,rank,Youtuber,subscribers,video views,category,Title,uploads,Country,Abbreviation,channel_type,...,subscribers_for_last_30_days,created_year,created_month,created_date,Gross tertiary education enrollment (%),Population,Unemployment rate,Urban_population,Latitude,Longitude
0,1,T-Series,245000000,228000000000.0,Music,T-Series,20082,India,IN,Music,...,2000000.0,2006.0,Mar,13.0,28.1,1366418000.0,5.36,471031528.0,20.593684,78.96288
1,2,YouTube Movies,170000000,0.0,Film & Animation,youtubemovies,1,United States,US,Games,...,,2006.0,Mar,5.0,88.2,328239500.0,14.7,270663028.0,37.09024,-95.712891
2,3,MrBeast,166000000,28368840000.0,Entertainment,MrBeast,741,United States,US,Entertainment,...,8000000.0,2012.0,Feb,20.0,88.2,328239500.0,14.7,270663028.0,37.09024,-95.712891
3,4,Cocomelon - Nursery Rhymes,162000000,164000000000.0,Education,Cocomelon - Nursery Rhymes,966,United States,US,Education,...,1000000.0,2006.0,Sep,1.0,88.2,328239500.0,14.7,270663028.0,37.09024,-95.712891
4,5,SET India,159000000,148000000000.0,Shows,SET India,116536,India,IN,Entertainment,...,1000000.0,2006.0,Sep,20.0,28.1,1366418000.0,5.36,471031528.0,20.593684,78.96288


In [29]:
m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=2, control_scale=True)
# Add tiled basemaps with different options (you can choose any of these or add more)
folium.TileLayer('openstreetmap').add_to(m)
folium.TileLayer('cartodbpositron').add_to(m)
folium.TileLayer('stamenterrain').add_to(m)
# Create a MarkerCluster layer for the markers
marker_cluster = MarkerCluster().add_to(m)

# Add markers for each YouTuber
for i, row in df.iterrows():
    popup_html = f"<b>Youtuber:</b> {row['Youtuber']}<br>" \
                 f"<b>Country:</b> {row['Country']}<br>" \
                 f"<b>Video Views:</b> {row['video views']}<br>" \
                 f"<b>Lowest Monthly Earnings:</b> {row['lowest_monthly_earnings']}<br>" \
                 f"<b>Highest Monthly Earnings:</b> {row['highest_monthly_earnings']}"
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=popup_html,
        icon=folium.Icon(color='blue'),  # Set marker color
    ).add_to(marker_cluster)

# Add Layer Control to toggle basemaps
folium.LayerControl().add_to(m)
folium.map.Marker(
    [df['Latitude'].mean(), df['Longitude'].mean()],
    icon=folium.DivIcon(
        icon_size=(150,36),
        icon_anchor=(7,20),
        html='',
    )
).add_to(m)
# Add title to the map
title_html = """
             <h3 align="center" style="font-size:16px"><b>YouTube Channels Worldwide Map</b></h3>
             """
m.get_root().html.add_child(folium.Element(title_html))

# Create a dropdown widget for YouTuber search
youtuber_dropdown = widgets.Dropdown(
    options=df['Youtuber'].tolist(),
    description='Search :',
    disabled=False,
)
display(youtuber_dropdown)

# Create an output widget to display the annual income
output_annual_income = Output()
def search_annual_income(b):
    selected_youtuber = youtuber_dropdown.value
    with output_annual_income:
        output_annual_income.clear_output()
        # Filter the DataFrame for the selected YouTuber
        youtuber_data = df[df['Youtuber'] == selected_youtuber]
        if not youtuber_data.empty:
            annual_income = (youtuber_data['lowest_monthly_earnings'] + youtuber_data['highest_monthly_earnings']) / 2 * 12
            print(f"Annual Income for {selected_youtuber}: ${annual_income.values[0]:,.2f}")
        else:
            print("No data available for the selected YouTuber.")

# Create a button widget for searching annual income
search_button = widgets.Button(description='Annual Income')
search_button.on_click(search_annual_income)
display(search_button)

# Display the output widget for annual income
display(output_annual_income)

# Display the map
display(m)


Dropdown(description='Search :', options=('T-Series', 'YouTube Movies', 'MrBeast', 'Cocomelon - Nursery Rhymes…

Button(description='Annual Income', style=ButtonStyle())

Output()