# Scrape Data

In [1]:
from top_github_scraper import (get_top_repo_urls, get_top_repos, get_top_contributors, 
get_top_user_urls, get_top_users)
#import datapane as dp 
import pandas as pd 
from tqdm import tqdm 
from folium import plugins
import geopandas
from geopy.geocoders import Nominatim
import folium
from folium.plugins import Search


In [2]:
keyword = "data science"

## Get top contributors

In [3]:
contributors = get_top_contributors(keyword, stop_page=2)

ValueError: No objects to concatenate

In [None]:
contributors

In [None]:
# Remove duplicates

contributors = contributors[~contributors.duplicated()]

In [None]:
contributors.head(10)

## Get top users

In [None]:
users = get_top_users(keyword, stop_page=10)

In [None]:
users.head(10)

## Combine data

In [None]:
all_users = pd.concat([contributors, users])

# Remove duplicated users
all_users = all_users[~all_users.duplicated()]

In [None]:
all_users['real_url'] = all_users.login.apply(lambda login: 'https://github.com/' + login)

In [None]:
all_users.head(10)

# Map

In [None]:
geolocator = Nominatim(user_agent='my_app')

all_users_with_locations = all_users[~all_users['location'].isna()]

In [None]:
def get_locations(location:str):
    try:
        return geolocator.geocode(location)
    except:
        pass
def get_lat(location):
    return location.latitude
   
def get_lon(location):
    return location.longitude

In [None]:
all_users_with_locations.location = (
    all_users_with_locations.location.apply(get_locations)
)

In [None]:
# Drop rows with no location 

all_users_with_locations = all_users_with_locations[~all_users_with_locations.location.isna()]
all_users_with_locations.reset_index(inplace=True, drop=True)

# Fill missing values
all_users_with_locations.fillna('', inplace=True)

In [None]:
# Get latititudes, longitudes, and address 

all_users_with_locations['latitudes'] = all_users_with_locations.location.apply(get_lat)
all_users_with_locations['longitudes'] = all_users_with_locations.location.apply(get_lon)
all_users_with_locations['address'] = all_users_with_locations.location.apply(lambda loc: loc.address)

In [None]:
def get_city(lat, lon):
    location = geolocator.reverse(str(lat)+","+str(lon) )
    address = location.raw['address']
    return address.get('city', '')

In [None]:
# Get city 

all_users_with_locations['city'] = all_users_with_locations.apply(lambda row: get_city(row.latitudes, row.longitudes), axis=1)

In [None]:
# Turn pandas DataFrame into GeoDataFrame

all_users_with_locations = geopandas.GeoDataFrame(
    all_users_with_locations, geometry=geopandas.points_from_xy(
    all_users_with_locations.longitudes, all_users_with_locations.latitudes))

In [None]:
all_users_with_locations.head(5)

In [None]:
all_users_with_locations.crs = "EPSG:4326"

all_users_with_locations.drop(columns=['location'], inplace=True)

In [None]:
lat_0 = int(all_users_with_locations.latitudes.iloc[0])
lon_0 = int(all_users_with_locations.longitudes.iloc[0])

m = folium.Map(location=[lat_0, lon_0], zoom_start=4)

# Add markers

fields = ["login", "real_url", "type",
          "name", "company", "address", "city", "email",
          "bio", "followers", "following",
          "public_repos", "public_gists"]
aliases = ["Login Name", "URL", "Type",
          "Name", "Company", "Address", "City", "Email",
          "Bio", "Followers", "Following",
          "Number of Public Repos", "Number of Public Gists"]

citygeo = folium.GeoJson(
    all_users_with_locations,
    tooltip=folium.GeoJsonTooltip(
         fields=fields, aliases=aliases, localize=True
    ),
).add_to(m)

# Add search 
citysearch = Search(
    layer=citygeo,
    geom_type="Point",
    placeholder="Search for a City",
    collapsed=True,
    search_label="city",
).add_to(m)

In [None]:
m 