In [1]:
import os
import pandas as pd
import numpy as np
import json
import janitor

pd.set_option('display.max_columns', 120)
pd.set_option('display.max_rows', 100)

from utilities import read_jsons

In [2]:
# Read in 100 random sample
df_random_sample = (
    pd.read_csv("../input/random_sample.csv")
    .assign(slug=lambda df: df["github"].str.replace("https://github.com/", ""))
    .assign(slug=lambda df: df["slug"].apply(lambda x: x[:-1] if x.endswith("/") else x))
    .assign(slug=lambda df: df["slug"].str.lower().str.strip())
#     .assign(fileslug=lambda df: df["slug"].str.replace("/", "_"))
    .assign(treated=1)
    # Assign the two types
    .assign(boughtstars=lambda df: (df.index < 25).astype(int))
    .assign(treated2=lambda df: df["boughtstars"] + df["treated"])
)
df_random_sample.head(3)

Unnamed: 0,pkg,return_code,github_url,homepage,earliest_release,gh_url_check,github,pypi,label,slug,treated,boughtstars,treated2
0,bird-ospf-link-db-parser,200.0,https://github.com/Andrew-Dickinson/bird-ospf-...,,2023-04-29T07:23:44,1.0,https://github.com/Andrew-Dickinson/bird-ospf-...,https://pypi.org/project/bird-ospf-link-db-par...,bird-ospf-link-db-parser\r\nhttps://github.com...,andrew-dickinson/bird-ospf-link-db-parser,1,1,2
1,asciicli,200.0,https://github.com/mrq-andras/asciicli,https://github.com/mrq-andras/asciicli,2023-04-28T07:22:55,1.0,https://github.com/mrq-andras/asciicli,https://pypi.org/project/asciicli/#history,asciicli\r\nhttps://github.com/mrq-andras/asci...,mrq-andras/asciicli,1,1,2
2,bdpotentiometer,200.0,https://github.com/bond-anton/BDPotentiometer,https://github.com/bond-anton/BDPotentiometer,2023-04-27T06:35:18,1.0,https://github.com/bond-anton/BDPotentiometer,https://pypi.org/project/bdpotentiometer/#history,bdpotentiometer\r\nhttps://github.com/bond-ant...,bond-anton/bdpotentiometer,1,1,2


In [3]:
# Repos characteristics
payloads = read_jsons("../output/repo_profile_payload/")

df_repos = (
    pd.DataFrame(payloads)
    .dropna(subset=["id"], ignore_index=True)
    .assign(slug=lambda df: df["full_name"].str.lower().str.strip())
    ## Get treatment assignment ----------------------------------------------
    .merge(df_random_sample, how="left", on="slug", validate="1:1", indicator=True)
    .assign(treated=lambda df: df["treated"].fillna(0).apply(int))
    .assign(treated2=lambda df: df["treated2"].fillna(0).apply(int))
    .assign(created_at=lambda df: pd.to_datetime(df['created_at']))
    .assign(year_created=lambda df: [dt.year for dt in df["created_at"]])
    ## Cleaning up additional features ---------------------------------------
    .assign(license_str=lambda df: [license["spdx_id"] if license else None for license in df["license"]])
    .assign(n_topics=lambda df: [len(topics) for topics in df["topics"]])
    # https://stackoverflow.com/a/8679592
    .assign(size_mb=lambda df: df["size"]/1024)
    .assign(is_org=lambda df: np.where(df["organization"].isna(), 0, 1))
    .assign(user=lambda df: [slug.split("/")[0] for slug in df["slug"]])
    .assign(owner_str=lambda df: [owner["login"].lower().strip() if owner else None for owner in df["owner"]])
    .assign(description_size=lambda df: df["description"].str.len().fillna(0).apply(int))
#     ## Getting readme stats --------------------------------------------------
#     .merge((pd.read_csv("../output/pypi_readme.csv", 
#                         usecols=["slug", "n_requirements", "raw_readme_len", "processed_readme_len"])), 
#            how="left", on="slug", validate="1:1"
#           )
)
assert (df_repos["user"] == df_repos["owner_str"]).all()
df_repos.head()

Unnamed: 0,id,node_id,name,full_name,private,owner,html_url,description,fork,url,forks_url,keys_url,collaborators_url,teams_url,hooks_url,issue_events_url,events_url,assignees_url,branches_url,tags_url,blobs_url,git_tags_url,git_refs_url,trees_url,statuses_url,languages_url,stargazers_url,contributors_url,subscribers_url,subscription_url,commits_url,git_commits_url,comments_url,issue_comment_url,contents_url,compare_url,merges_url,archive_url,downloads_url,issues_url,pulls_url,milestones_url,notifications_url,labels_url,releases_url,deployments_url,created_at,updated_at,pushed_at,git_url,ssh_url,clone_url,svn_url,homepage_x,size,stargazers_count,watchers_count,language,has_issues,has_projects,has_downloads,has_wiki,has_pages,has_discussions,forks_count,mirror_url,archived,disabled,open_issues_count,license,allow_forking,is_template,web_commit_signoff_required,topics,visibility,forks,open_issues,watchers,default_branch,permissions,temp_clone_token,network_count,subscribers_count,organization,parent,source,message,documentation_url,template_repository,slug,pkg,return_code,github_url,homepage_y,earliest_release,gh_url_check,github,pypi,label,treated,boughtstars,treated2,_merge,year_created,license_str,n_topics,size_mb,is_org,user,owner_str,description_size
0,632697072.0,R_kgDOJbYw8A,ezfinpy,renanmoretto/ezfinpy,False,"{'login': 'renanmoretto', 'id': 103861667, 'no...",https://github.com/renanmoretto/ezfinpy,,False,https://api.github.com/repos/renanmoretto/ezfinpy,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,https://api.github.com/repos/renanmoretto/ezfi...,2023-04-26 00:19:26+00:00,2023-05-03T00:57:56Z,2023-05-03T19:11:09Z,git://github.com/renanmoretto/ezfinpy.git,git@github.com:renanmoretto/ezfinpy.git,https://github.com/renanmoretto/ezfinpy.git,https://github.com/renanmoretto/ezfinpy,,17.0,1.0,1.0,Python,True,True,True,True,False,False,0.0,,False,False,0.0,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",True,False,False,[],public,0.0,0.0,1.0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,0.0,1.0,,,,,,,renanmoretto/ezfinpy,,,,,,,,,,0,,0,left_only,2023,MIT,0,0.016602,0,renanmoretto,renanmoretto,0
1,629920730.0,R_kgDOJYvT2g,statplot,dingyizhao/statplot,False,"{'login': 'dingyizhao', 'id': 46778380, 'node_...",https://github.com/dingyizhao/statplot,Common plot code used in astrophysics,False,https://api.github.com/repos/dingyizhao/statplot,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,https://api.github.com/repos/dingyizhao/statpl...,2023-04-19 09:44:40+00:00,2023-04-25T08:11:21Z,2023-04-25T08:11:17Z,git://github.com/dingyizhao/statplot.git,git@github.com:dingyizhao/statplot.git,https://github.com/dingyizhao/statplot.git,https://github.com/dingyizhao/statplot,,3.0,0.0,0.0,Python,True,True,True,True,False,False,0.0,,False,False,0.0,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",True,False,False,[],public,0.0,0.0,0.0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,0.0,1.0,,,,,,,dingyizhao/statplot,,,,,,,,,,0,,0,left_only,2023,MIT,0,0.00293,0,dingyizhao,dingyizhao,37
2,611058264.0,R_kgDOJGwCWA,imgutils,deepghs/imgutils,False,"{'login': 'deepghs', 'id': 126587470, 'node_id...",https://github.com/deepghs/imgutils,A convenient and user-friendly anime-style ima...,False,https://api.github.com/repos/deepghs/imgutils,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,https://api.github.com/repos/deepghs/imgutils/...,2023-03-08 02:32:20+00:00,2023-05-16T13:13:49Z,2023-05-18T08:56:31Z,git://github.com/deepghs/imgutils.git,git@github.com:deepghs/imgutils.git,https://github.com/deepghs/imgutils.git,https://github.com/deepghs/imgutils,https://deepghs.github.io/imgutils/,172568.0,2.0,2.0,Python,True,True,True,True,True,False,0.0,,False,False,2.0,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",True,False,False,"[anime, image-processing, python]",public,0.0,2.0,2.0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,0.0,1.0,"{'login': 'deepghs', 'id': 126587470, 'node_id...",,,,,,deepghs/imgutils,,,,,,,,,,0,,0,left_only,2023,MIT,3,168.523438,1,deepghs,deepghs,141
3,511216783.0,R_kgDOHniMjw,robotframework-robosapiens,imbus/robotframework-robosapiens,False,"{'login': 'imbus', 'id': 67375753, 'node_id': ...",https://github.com/imbus/robotframework-robosa...,Fully localized Robot Framework library for au...,False,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,https://api.github.com/repos/imbus/robotframew...,2022-07-06 16:40:13+00:00,2023-04-24T17:32:16Z,2023-05-12T11:04:34Z,git://github.com/imbus/robotframework-robosapi...,git@github.com:imbus/robotframework-robosapien...,https://github.com/imbus/robotframework-robosa...,https://github.com/imbus/robotframework-robosa...,https://imbus.github.io/robotframework-robosap...,991.0,0.0,0.0,Python,True,True,True,True,True,False,0.0,,False,False,0.0,"{'key': 'apache-2.0', 'name': 'Apache License ...",True,False,False,"[gui-automation, gui-testing, robotframework]",public,0.0,0.0,0.0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,0.0,2.0,"{'login': 'imbus', 'id': 67375753, 'node_id': ...",,,,,,imbus/robotframework-robosapiens,,,,,,,,,,0,,0,left_only,2022,Apache-2.0,3,0.967773,1,imbus,imbus,86
4,630769416.0,R_kgDOJZjHCA,amdgpu_stats,joshlay/amdgpu_stats,False,"{'login': 'joshlay', 'id': 10002815, 'node_id'...",https://github.com/joshlay/amdgpu_stats,A Python module/TUI that provides AMD GPU stat...,False,https://api.github.com/repos/joshlay/amdgpu_stats,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,https://api.github.com/repos/joshlay/amdgpu_st...,2023-04-21 05:44:05+00:00,2023-04-30T22:47:02Z,2023-05-12T02:41:09Z,git://github.com/joshlay/amdgpu_stats.git,git@github.com:joshlay/amdgpu_stats.git,https://github.com/joshlay/amdgpu_stats.git,https://github.com/joshlay/amdgpu_stats,,745.0,1.0,1.0,Python,True,True,True,True,False,False,0.0,,False,False,0.0,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",True,False,False,[],public,0.0,0.0,1.0,master,"{'admin': False, 'maintain': False, 'push': Fa...",,0.0,2.0,,,,,,,joshlay/amdgpu_stats,,,,,,,,,,0,,0,left_only,2023,MIT,0,0.727539,0,joshlay,joshlay,52


In [4]:
# Users characteristics
df_users = (
    pd.read_csv("../output/users_profile.csv")
    .assign(user=lambda df: df["login"].str.lower().str.strip())
    .drop_duplicates("user")
    ## Get treatment assignment
    .merge((
        df_repos
        .select_columns(["user", "treated"])
        .drop_duplicates(["user", "treated"])
    ), how="left", on="user", validate="1:m"
    )
    .assign(treated=lambda df: df["treated"].fillna(0).apply(int))    
    ## Cleaning up additional features
    .assign(created_at=lambda df: pd.to_datetime(df['created_at']))
    .assign(year_created=lambda df: [dt.year for dt in df["created_at"]])    
    .assign(updated_at=lambda df: pd.to_datetime(df['created_at']))
    .assign(year_updated=lambda df: [dt.year for dt in df["updated_at"]])    
    .assign(list_co=lambda df: np.where(df["company"].isna(), 0, 1))
    .assign(list_loc=lambda df: np.where(df["location"].isna(), 0, 1))
    .assign(list_email=lambda df: np.where(df["email"].isna(), 0, 1))
    .assign(list_blog=lambda df: np.where(df["blog"].isna(), 0, 1))
    .assign(list_bio=lambda df: np.where(df["bio"].isna(), 0, 1))
    .assign(bio_size=lambda df: df["bio"].str.len().fillna(0).apply(int))
)
df_users.head()

Unnamed: 0,login,id,node_id,avatar_url,gravatar_id,url,html_url,followers_url,following_url,gists_url,starred_url,subscriptions_url,organizations_url,repos_url,events_url,received_events_url,type,site_admin,name,company,blog,location,email,hireable,bio,twitter_username,public_repos,public_gists,followers,following,created_at,updated_at,retrieval_date,user,treated,year_created,year_updated,list_co,list_loc,list_email,list_blog,list_bio,bio_size
0,Arsybai,33319709,MDQ6VXNlcjMzMzE5NzA5,https://avatars.githubusercontent.com/u/333197...,,https://api.github.com/users/Arsybai,https://github.com/Arsybai,https://api.github.com/users/Arsybai/followers,https://api.github.com/users/Arsybai/following...,https://api.github.com/users/Arsybai/gists{/gi...,https://api.github.com/users/Arsybai/starred{/...,https://api.github.com/users/Arsybai/subscript...,https://api.github.com/users/Arsybai/orgs,https://api.github.com/users/Arsybai/repos,https://api.github.com/users/Arsybai/events{/p...,https://api.github.com/users/Arsybai/received_...,User,False,Arsybai,Clee Ltd,http://arsybai.com,indonesia,me@arsybai.com,,Just want to death by cuteness\r\n,,36,0,64,7,2017-11-02 12:38:27+00:00,2017-11-02 12:38:27+00:00,2023-05-20 21:41:26.180953,arsybai,0,2017,2017,1,1,1,1,1,32
1,GaNiziolek,54728889,MDQ6VXNlcjU0NzI4ODg5,https://avatars.githubusercontent.com/u/547288...,,https://api.github.com/users/GaNiziolek,https://github.com/GaNiziolek,https://api.github.com/users/GaNiziolek/followers,https://api.github.com/users/GaNiziolek/follow...,https://api.github.com/users/GaNiziolek/gists{...,https://api.github.com/users/GaNiziolek/starre...,https://api.github.com/users/GaNiziolek/subscr...,https://api.github.com/users/GaNiziolek/orgs,https://api.github.com/users/GaNiziolek/repos,https://api.github.com/users/GaNiziolek/events...,https://api.github.com/users/GaNiziolek/receiv...,User,False,Gabriel Niziolek,@TempoX-Ltda,,Brazil,,,Software Developer on TempoX | Django | Pyrami...,gniziolek,25,3,6,15,2019-08-31 00:19:09+00:00,2019-08-31 00:19:09+00:00,2023-05-20 21:41:26.180953,ganiziolek,1,2019,2019,1,1,0,0,1,68
2,MihailSalnikov,2613180,MDQ6VXNlcjI2MTMxODA=,https://avatars.githubusercontent.com/u/261318...,,https://api.github.com/users/MihailSalnikov,https://github.com/MihailSalnikov,https://api.github.com/users/MihailSalnikov/fo...,https://api.github.com/users/MihailSalnikov/fo...,https://api.github.com/users/MihailSalnikov/gi...,https://api.github.com/users/MihailSalnikov/st...,https://api.github.com/users/MihailSalnikov/su...,https://api.github.com/users/MihailSalnikov/orgs,https://api.github.com/users/MihailSalnikov/repos,https://api.github.com/users/MihailSalnikov/ev...,https://api.github.com/users/MihailSalnikov/re...,User,False,Mikhail Salnikov,NLP Group,https://medium.com/@MSalnikov,,,True,Researcher in DL and NLP,,27,24,7,2,2012-10-21 15:03:02+00:00,2012-10-21 15:03:02+00:00,2023-05-20 21:41:26.180953,mihailsalnikov,0,2012,2012,1,0,0,1,1,24
3,Simply-Artificial,130693929,O_kgDOB8o7KQ,https://avatars.githubusercontent.com/u/130693...,,https://api.github.com/users/Simply-Artificial,https://github.com/Simply-Artificial,https://api.github.com/users/Simply-Artificial...,https://api.github.com/users/Simply-Artificial...,https://api.github.com/users/Simply-Artificial...,https://api.github.com/users/Simply-Artificial...,https://api.github.com/users/Simply-Artificial...,https://api.github.com/users/Simply-Artificial...,https://api.github.com/users/Simply-Artificial...,https://api.github.com/users/Simply-Artificial...,https://api.github.com/users/Simply-Artificial...,Organization,False,Simply Artificial,,,,simply-artificial@itsmealfie0.com,,,,2,0,1,0,2023-04-13 17:37:21+00:00,2023-04-13 17:37:21+00:00,2023-05-20 21:41:26.180953,simply-artificial,0,2023,2023,0,0,1,0,0,0
4,jposada202020,34255413,MDQ6VXNlcjM0MjU1NDEz,https://avatars.githubusercontent.com/u/342554...,,https://api.github.com/users/jposada202020,https://github.com/jposada202020,https://api.github.com/users/jposada202020/fol...,https://api.github.com/users/jposada202020/fol...,https://api.github.com/users/jposada202020/gis...,https://api.github.com/users/jposada202020/sta...,https://api.github.com/users/jposada202020/sub...,https://api.github.com/users/jposada202020/orgs,https://api.github.com/users/jposada202020/repos,https://api.github.com/users/jposada202020/eve...,https://api.github.com/users/jposada202020/rec...,User,False,jposada202020,,https://mastodon.social/@jposada202020,,,,,,41,5,11,7,2017-12-05 00:05:32+00:00,2017-12-05 00:05:32+00:00,2023-05-20 21:41:26.180953,jposada202020,0,2017,2017,0,0,0,1,0,0


In [5]:
df_repos.to_csv("../output/repo_baselines.csv", index=False)
df_users.to_csv("../output/user_baselines.csv", index=False)