## Drag Queen Migration Map

The purpose of this project is to map RuPaul's Drag Race contestants hometowns and where they moved to after being on the show. The goal is to find cultural hubs where drag queens cluster. The first section of code is how I obtained my data, by scraping information from a drag racing wiki page. After manually adding XY data for each queen, the next section aims to map and draw lines of their migration routes.  Then, in the last section, I try to make this map interactive. 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [None]:
base_url = "https://rupaulsdragrace.fandom.com"

In [None]:
category_url = base_url + "/wiki/Category:RuPaul%27s_Drag_Race_Contestants"
response = requests.get(category_url)
soup = BeautifulSoup(response.text, "html.parser")

In [None]:
contestant_data = []
contestant_links = soup.select(".category-page__member-link")

In [None]:
for link in contestant_links: 
    name = link.text.strip()
    profile_url = base_url + link["href"]

    # Fetch contestant's profile page
    profile_response = requests.get(profile_url)
    profile_soup = BeautifulSoup(profile_response.text, "html.parser")

    # Find all "pi-data" elements inside the infobox
    hometown = "Not Found"
    location = "Not Found"
    info_boxes = profile_soup.select(".pi-item.pi-data")  # Select all data rows in the infobox
    
    for box in info_boxes:
        label = box.find(class_="pi-data-label")
        value = box.find(class_="pi-data-value")

        if label and value:
            if "Hometown" in label.text:
                hometown = value.text.strip()
            elif "Location" in label.text:
                location = value.text.strip()

    # Store data
    contestant_data.append({"Name": name, "Hometown": hometown, "Location": location})
    
    # Avoid overwhelming the server (optional)
    time.sleep(1)

In [None]:
df = pd.DataFrame(contestant_data)
print(df)

In [None]:
df = pd.DataFrame(contestant_data).drop_duplicates()


In [None]:
df.to_csv("drag_race_contestants2.csv", index=False)


# Start HERE

In [None]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import LineString, Point
import matplotlib.pyplot as plt
import contextily


In [None]:
# Load the CSV into a DataFrame
df = pd.read_csv("drag_race_contestants2.csv", encoding="ISO-8859-1")
df.head()

In [None]:
df[['home_lat', 'home_lon']] = df['Hcords'].str.split(',', expand=True).astype(float)
df[['curr_lat', 'curr_lon']] = df['Lcords'].str.split(',', expand=True).astype(float)

In [None]:
# Remove rows where 'OmitME' is marked (assuming 'X' is the marker)
df = df[df["OmitME"].isna()]

In [None]:
df['line'] = df.apply(lambda row: LineString([
    Point(row['home_lon'], row['home_lat']),
    Point(row['curr_lon'], row['curr_lat'])
]), axis=1)

In [None]:
gdf_lines = gpd.GeoDataFrame(df, geometry='line', crs='EPSG:4326')
gdf_home = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['home_lon'], df['home_lat']), crs='EPSG:4326')
gdf_current = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['curr_lon'], df['curr_lat']), crs='EPSG:4326')

In [None]:
fig, ax = plt.subplots(figsize=(14, 8))
gdf_lines.plot(ax=ax, color='red', linewidth=0.8, alpha=0.4)
gdf_home.plot(ax=ax, color='blue', markersize=10, label='Hometown')
gdf_current.plot(ax=ax, color='green', markersize=10, label='Current')

plt.title('Drag Queen Migration Map (RPDR)')
plt.legend()
plt.show()

In [None]:
import contextily as ctx

# Reproject GeoDataFrames to Web Mercator for basemap
gdf_lines = gdf_lines.to_crs(epsg=3857)
gdf_home = gdf_home.to_crs(epsg=3857)
gdf_current = gdf_current.to_crs(epsg=3857)

# Plot with basemap
fig, ax = plt.subplots(figsize=(14, 8))
gdf_lines.plot(ax=ax, color='red', linewidth=0.8, alpha=0.4)
gdf_home.plot(ax=ax, color='blue', markersize=6, label='Hometown')
gdf_current.plot(ax=ax, color='green', markersize=13, label='Current')

# Add basemap (e.g., OpenStreetMap)
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik)

# Optional: remove axis for a cleaner look
ax.set_axis_off()

plt.title('Drag Queen Migration Map (RPDR)')
plt.legend()
plt.show()


In [None]:
plt.savefig("drag_queen_migration_map.png", dpi=300, bbox_inches='tight')


### Adjustment to current viz

In [None]:
location_counts = df.groupby(['curr_lat', 'curr_lon']).size().reset_index(name='queen_count')

In [None]:
df = df.merge(location_counts, on=['curr_lat', 'curr_lon'])

In [None]:
gdf_current = gpd.GeoDataFrame(
    df.copy(),  # important to avoid side effects
    geometry=gpd.points_from_xy(df['curr_lon'], df['curr_lat']),
    crs='EPSG:4326'
)

In [None]:
print(gdf_current.columns)


In [None]:
gdf_current['marker_size'] = gdf_current['queen_count'] * 10  # Adjust multiplier as needed

In [None]:
gdf_current = gdf_current.to_crs(epsg=3857)

In [None]:
gdf_current.plot(ax=ax, color='green', markersize=gdf_current['marker_size'], label='Current (scaled)')


In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(14, 8))

# Plot lines
gdf_lines.plot(ax=ax, color='red', linewidth=0.8, alpha=0.4)

# Plot hometowns
gdf_home.plot(ax=ax, color='blue', markersize=10, label='Hometown')

# Plot current locations with scaled size
gdf_current.plot(ax=ax, color='green', markersize=gdf_current['marker_size'], label='Current Location')

# Add basemap
import contextily as ctx
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)

# Set plot settings
ax.set_axis_off()
plt.title('Drag Queen Migration Map (RPDR)')
plt.legend()

# Show and/or save the map
plt.savefig("queen_migration.png", dpi=300, bbox_inches='tight')
plt.show()  


### Lets run some queries

In [None]:
num_queens = len(df)
print(f"Number of queens: {num_queens}")

In [None]:
# Project lines to EPSG:3857 for meters
gdf_lines_m = gdf_lines.to_crs(epsg=3857)

# Calculate length in meters and convert to km
gdf_lines_m['distance_km'] = gdf_lines_m.length / 1000

# Average
average_distance = gdf_lines_m['distance_km'].mean()
print(f"Average migration distance: {average_distance:.2f} km")


In [None]:
std_distance = gdf_lines_m['distance_km'].std()
print(f"Standard deviation of migration distance: {std_distance:.2f} km")


In [None]:
median_distance = gdf_lines_m['distance_km'].median()
print(f"Median migration distance: {median_distance:.2f} km")


In [None]:
import matplotlib.pyplot as plt

plt.hist(gdf_lines_m['distance_km'], bins=20, color='blue', edgecolor='black')
plt.title("Distribution of Migration Distances")
plt.xlabel("Distance (km)")
plt.ylabel("Frequency")
plt.show()
plt.savefig("queen_dis_distrib.png", dpi=300, bbox_inches='tight')


In [None]:
# Assuming you have a region column (e.g., 'Region' or 'HometownState')
avg_distance_by_region = df.groupby('Hometown')['distance_km'].mean()
print(avg_distance_by_region)


In [None]:
import folium
from folium import GeoJson, GeoJsonTooltip

In [None]:
gdf_lines = gdf_lines.to_crs(epsg=4326)
gdf_home = gdf_home.to_crs(epsg=4326)
gdf_current = gdf_current.to_crs(epsg=4326)

In [None]:
# Center the map (could also use mean of all coordinates)
m = folium.Map(location=[39.5, -98.35], zoom_start=4, tiles="CartoDB positron")

In [None]:
# Add migration lines with hover tooltip
folium.GeoJson(
    gdf_lines,
    tooltip=GeoJsonTooltip(fields=['Queen'], aliases=['Queen:']),
    style_function=lambda x: {
        'color': 'red',
        'weight': 2,
        'opacity': 0.5
    }
).add_to(m)

In [None]:
# Add hometowns
for _, row in gdf_home.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=3,
        color='blue',
        fill=True,
        fill_opacity=0.8,
        popup=row['Queen']
    ).add_to(m)

In [None]:
for _, row in gdf_current.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=row['marker_size'] / 10,
        color='green',
        fill=True,
        fill_opacity=0.8,
        popup=row['Queen']
    ).add_to(m)


In [None]:
gdf_lines.head()
