## Extract User data

This is the first notebook which works on user data.
In this notebook we do analysis of raw user data and filter based on country and organization.


In [23]:
import json
import requests

In [24]:
# Loading raw user data
with open('./data/users.json') as f:
    data=json.load(f)

In [25]:
print(len(data["result"])) # Total number of users (158K users)

158517


In [26]:
print(data["result"][0]) # Checking user 1 (tourist)

{'lastName': 'Korotkevich', 'country': 'Belarus', 'lastOnlineTimeSeconds': 1542416129, 'city': 'Gomel', 'rating': 3372, 'friendOfCount': 15538, 'titlePhoto': '//userpic.codeforces.com/422/title/6dd5ba0df6a2f834.jpg', 'handle': 'tourist', 'avatar': '//userpic.codeforces.com/422/avatar/1d4e692d6695dfa6.jpg', 'firstName': 'Gennady', 'contribution': 154, 'organization': 'ITMO University', 'rank': 'legendary grandmaster', 'maxRating': 3739, 'registrationTimeSeconds': 1265987288, 'maxRank': 'legendary grandmaster'}


In [27]:
# Filtering only Indian users

indianUsers=[]
for user in data["result"]:
    if "country" in user and user["country"]=='India':
        indianUsers.append(user)
    
len(indianUsers) # Total number of user in India (11.5K Indian users)

11581

In [28]:

# Total number of organisations in India

organizationList=dict()
for user in indianUsers:
    if "organization" in user and user["organization"]!='':
        organization = user["organization"]
        if organization not in organizationList:
            organizationList[organization]=[user]
        else:
            organizationList[organization].append(user)
            
# number of organisations (959 organizations)
print(len(organizationList))


959


In [29]:
# Filtering on organizations

filteredListNames=[]
filteredList={}
for organization in organizationList:
    if len(organizationList[organization])>20: # Filtering organisations with > 20 users
        filteredList[organization]=organizationList[organization]
        filteredListNames.append(organization)

# Organizations with size > 20 
print(len(filteredListNames))


83


In [30]:
filteredListNames # List of organizations 

['DA-IICT',
 'IIIT Hyderabad',
 'Indian Institute of Technology Kharagpur',
 'IIT Kanpur',
 'Indian Institute of Technology Madras',
 'IIIT Delhi',
 'IIT Roorkee',
 'IIT Delhi',
 'Jaypee Institute of Information Technology,Noida',
 'IIT BHU Varanasi',
 'BITS, Pilani',
 'IIIT Bangalore',
 'Nirma University',
 'BITS Pilani Hyderabad Campus',
 'IIT Patna',
 'SSN College Of Engineering',
 'MNNIT, Allahabad',
 'IIT Bombay',
 'IIT Indore',
 'Delhi Technological University',
 'NITK , Surathkal',
 'IIT Jodhpur',
 'IIT Guwahati',
 'BITS Pilani Goa Campus',
 'IIT Hyderabad',
 'IIIT Allahabad',
 'IIT Mandi',
 'IIT Kharagpur',
 'IIT BHU, Varanasi',
 'Netaji Subhas Institute of Technology, Delhi',
 'NIT Kurukshetra',
 'National Institute of Technology Patna',
 'Indian School of Mines',
 'NIT Warangal',
 'Indian Institute of Technology Delhi',
 'LNMIIT',
 'BIT, Mesra',
 'BITS Pilani, Pilani campus',
 'Vellore Institute of Technology,Vellore',
 'Indian Institute Of Technology Varanasi',
 'ABV-IIITM G

In [31]:
filteredList['IIIT Bangalore'] # Sanity check of IIIT-Bangalore data

[{'avatar': '//userpic.codeforces.com/264494/avatar/b603d409c06858e3.jpg',
  'city': 'Mumbai',
  'contribution': 36,
  'country': 'India',
  'firstName': 'Aditya',
  'friendOfCount': 643,
  'handle': 'VastoLorde95',
  'lastName': 'Paliwal',
  'lastOnlineTimeSeconds': 1542438888,
  'maxRank': 'master',
  'maxRating': 2228,
  'organization': 'IIIT Bangalore',
  'rank': 'master',
  'rating': 2215,
  'registrationTimeSeconds': 1419091653,
  'titlePhoto': '//userpic.codeforces.com/264494/title/8758ff22f17e4cbd.jpg'},
 {'avatar': '//userpic.codeforces.com/265656/avatar/ca4aeae5665589fd.jpg',
  'city': 'Mumbai',
  'contribution': 0,
  'country': 'India',
  'firstName': 'Simran',
  'friendOfCount': 203,
  'handle': 'simrandokania',
  'lastName': 'Dokania',
  'lastOnlineTimeSeconds': 1538107895,
  'maxRank': 'expert',
  'maxRating': 1814,
  'organization': 'IIIT Bangalore',
  'rank': 'expert',
  'rating': 1814,
  'registrationTimeSeconds': 1419588039,
  'titlePhoto': '//userpic.codeforces.com/2

In [32]:
# Number of users in filtered organizations

# We want to know the number of contests the user has participated.
# This requires several API calls which sometimes fail. Hence they are wrapped in a try except block
# We need to run this till all the data is loaded

userCount=0
s=requests.Session()
for org in filteredList:
    for user in filteredList[org]:
        userCount+=1
        if "contestCount" not in user:
            try:
                user["contestCount"]= len(json.loads(s.get("https://codeforces.com/api/user.rating?handle=" +user["handle"]).text)["result"])
                print(user["handle"],userCount)
            except:
                print("FAIL",user["handle"])


Sumeet.Varma 1
akshay_miterani 2
kuldeeppatel 3
tanmay273 4
Hiren.Vaghela 5
lord_bendtner 6
yashkumar18 7
Learner99 8
aayushkapadia 9
tarang1610 10
Legend_Goku 11
Rutvik_Kothari 12
Beerus_Sama 13
MoHib85 14
Hardik.decoder 15
RP_9 16
samarthkothari 17
ronakvaghela45 18
HardikDobariya 19
kunal_khatri 20
kunalk 21
RAMBO_tejasv 22
a_g2009 23
Maulik_Patel 24
aya_cool 25
PriyamV_181 26
dhruval161 27
Sabuwala.Mustafa 28
AwwwMeraBacha 29
Sagar_Savaliya 30
U_Square 31
uttam36 32
dhrumil140396 33
Oreo_85 34
--UMANG-- 35
shirishJain 36
akdent 37
tapanr97 38
Grind 39
_Mr_Fab 40
kishanradia 41
Alinus 42
hhvys 43
suparsh14 44
Ajudiya_13 45
bsrkaditya 46
HMK 47
hardik 48
karan_thakkar 49
amit2112 50
shreyansh08 51
umang._.panchal 52
het 53
anandpatel9998 54
shahjay147 55
ujaval280498 56
Mihir_Gajera1 57
Gautam_009 58
artifexbm 59
da_201501181 60
paresh 61
shahsaurabh0605 62
--HK-- 63
rpthegreat 64
prerakd 65
gbrocks 66
no_1999 67
speedster7795 68
Meet.Sinojia 69
harshvasoya008 70
brightcoder 71
pH7 7

paranoid_coder 565
prabhakarbikkaneti 566
rishikasharma 567
codeBloooded 568
Joker_mahesh 569
palash_1511 570
schowdhury 571
Aditya0304 572
persistence 573
evil666man 574
biswajitsc 575
ChestnutRice 576
anuraganand 577
acraider 578
nanu 579
arkanath 580
sanyam 581
piyushrathipr 582
manavs19 583
rahul_iit 584
sourrvvvyy 585
gupta97 586
ninda029 587
saurabh060792 588
sauravray2587 589
ragnarok0211 590
adi1998 591
fsociety00 592
raghav_agarwal 593
asquare10 594
shrihariabhat 595
meetjoshi 596
arnavkj95 597
jatin_27 598
rajeevkgp 599
sidrakesh 600
sudeepraja94 601
tejas3037 602
sumanth232 603
LaserBoy 604
ayush07 605
shubhparekh 606
shashwat001 607
ankeshanand 608
p_d 609
rakesh411 610
arijitpanigrahy 611
rossatron 612
pinku22 613
Code_Lion 614
manyfacedboy 615
nks43 616
hargup 617
Code_chat 618
rajrishav 619
skgdkg 620
sasiprasanth 621
harne.sudutt11 622
VikramVarun 623
dipayan92 624
harshitgargjpr 625
Rik001 626
Jatin86400 627
tejus_gupta 628
cvikas54 629
Jilun 630
abhishek12 631
pramesh

akulkanojia2414 1115
jsuyash1514 1116
adityaprakash 1117
gurwinderiitr 1118
joker_123 1119
nitin_iitr 1120
TheDarthvader 1121
maurice37 1122
rk.avi 1123
swap96 1124
aagarwal9782 1125
kanav99 1126
ghoom1 1127
barsha 1128
sanjuprk 1129
shivansh100 1130
umsh1ume 1131
vershwal 1132
parateekR 1133
Pirate_joker 1134
rachit_parikh 1135
shawak 1136
shubhamsingh.araria 1137
amanp1151 1138
Natsu6767 1139
harry_game 1140
Manoj_Parihar 1141
shikhar.bsar 1142
kk91225 1143
kanishk1225 1144
incognito_nag 1145
spears 1146
anand.101b 1147
supercoolcoder 1148
ramakanthd92 1149
samruddhi_mane 1150
amarnath1031 1151
aishiitr 1152
johny23 1153
gautamgtm 1154
spunk1803 1155
heena_rana 1156
manikantanallagatla 1157
dhanju 1158
nashmathur 1159
anjalinauhwar 1160
itstimeforme 1161
ankitaiitr 1162
xrox 1163
rishav_golu 1164
mssvharsha555 1165
AayushAC1998 1166
karan1808 1167
pdev250 1168
aquaabh 1169
pbhadu21 1170
achuth_noob 1171
Just_code_it 1172
Shivansh 1173
abhiiitr6002 1174
kunal017 1175
ambar_iitr 1176
b

piyushiitv 1628
sanket_11 1629
ssn 1630
shreyas_21 1631
uddeshya2257 1632
manuag 1633
Saurabh303 1634
pavanmggp 1635
y_malik 1636
nishantwrp 1637
praneethcoder 1638
piyushmaurya 1639
eternallearner 1640
__SemiColon__ 1641
revanth000 1642
zigyasu 1643
kunal.agarwal67 1644
sans21 1645
akj_j 1646
prem_1011 1647
psn99 1648
bhavyapruthi 1649
himanshu280 1650
hvardhansingh 1651
kalpesh_goyal 1652
Pranshu54 1653
codef09 1654
vineethtatipatri19 1655
Ankit007 1656
code_b0t 1657
usrbom 1658
sarpit23 1659
kautilyakatiha10 1660
Apurv1998 1661
niket.khandelwal 1662
dex_10 1663
Dynamic_Dyanesh 1664
shubham201 1665
gaurav.iit.cse 1666
covalent_bond 1667
abhijeetiitv15 1668
neharao9 1669
Fragg3R 1670
ashishkr23438 1671
The_D 1672
mayankrungta7 1673
rishabhach 1674
swat_cf 1675
gms245 1676
kgahlot_0805 1677
dmohanty 1678
Malik99 1679
jackmiester777 1680
CodeBreaker_6 1681
Ravish 1682
ayushi_sneha 1683
himanshu-mittal 1684
bruce.wayne 1685
beowulf 1686
AMIT_SOLANKI 1687
_mtg98 1688
dracaris123 1689
pic_

rohith206 2143
ekanshi 2144
archishman916 2145
arpansarangi 2146
dhruv23899 2147
sauravgupta97 2148
vrs98 2149
skk2298 2150
AGB39 2151
kaushik_p9 2152
Pratap09 2153
Equinox 2154
scopeInfinity 2155
chan_iitp 2156
Gear4 2157
eviebot 2158
MJS1997 2159
Nightmare05 2160
FazleRahmanEjazi 2161
boreddy 2162
dev27 2163
hackr32 2164
nanobyte 2165
_deepak_ 2166
rg99 2167
NEXUS02 2168
avi_karnik 2169
Suga-R 2170
aks_12345 2171
ravik1 2172
AsHes 2173
Bleedpositive 2174
rk957019 2175
UNSEEN07 2176
SilverScar 2177
psycho999 2178
SageEx 2179
The_Wanderer 2180
Taeven 2181
intruder_p 2182
agathodaemon 2183
cc15 2184
grb.avatar 2185
potter1024 2186
coderHJ 2187
dhanush_sr 2188
srv_twry 2189
bhatchandan99 2190
sahil99 2191
yash_palriwal 2192
4ced_koding 2193
Gaur..Av 2194
koshyari 2195
mnjkmr398 2196
prvn123 2197
B_Ganesh_Reddy 2198
NEXUS01 2199
kushal10 2200
aryadas98 2201
dexty_007 2202
AkJn 2203
skandgupta02 2204
_sinus 2205
pi.codemonk 2206
ashutoshmishra 2207
love_san 2208
silencecoder 2209
naveen199

newcoderin 2673
namarnath1996 2674
shiv4s 2675
dracarys98 2676
HeadHunteR779 2677
wallcrawler 2678
kumar_abd 2679
jainsee24 2680
darshil_babel 2681
vaibhavdalmia 2682
nimxor 2683
Aviator96 2684
nukul430 2685
ishangoel9853 2686
IIT_king 2687
vikky 2688
saurabh1900 2689
krishna_mahesh 2690
aditya1701 2691
heello 2692
kaspers 2693
sdnr1 2694
sanyamg123 2695
nishant_coder 2696
shubham1694 2697
shivamd 2698
dreamplay 2699
coolbun 2700
amitrc17 2701
rishabh7699 2702
niti94 2703
sameer47 2704
shubhamgarg1 2705
nakli 2706
Arstine 2707
himanshugupta 2708
gone_girl 2709
Mithrandir_14 2710
sanchit_h 2711
shamitlal_14 2712
sirjan13 2713
subhamdtu 2714
osank 2715
deepankarak 2716
Shreymanik 2717
nishant0208 2718
aman1893 2719
T_BONE_289 2720
cmon_son 2721
hereicomewf 2722
brucewayne123 2723
incarnation 2724
rohanrko96.6 2725
canoodle 2726
abhishekk781 2727
arshu 2728
shreyas14 2729
ishu_05 2730
ayushsinha1996 2731
ka4tik 2732
ujjwal_goel 2733
vabs 2734
vteotia9 2735
cptcharisma 2736
kevin_212 2737


harshaga97 3198
sravan1022 3199
nivi_yo 3200
chakri1804 3201
Aashish1777 3202
codeme0720 3203
msnitiz 3204
thanos_03 3205
mango123 3206
shiva_r31 3207
rivudas 3208
deathsurgeon 3209
m17 3210
aditya_kakarot 3211
lucifer2709 3212
pankaj_cdr 3213
dragonslayerx 3214
gohan95 3215
satylogin 3216
greedy_me 3217
Divanshu 3218
vampire_slayer 3219
alooochaat1998 3220
sarkysaurabh 3221
hagu 3222
brainstorm 3223
vaibhav138 3224
amanshri93 3225
conor 3226
rishabhtwr07 3227
san_1512 3228
hybrid 3229
asvcracker007 3230
abhishekg 3231
aditya_3494 3232
ankur014 3233
mad.exe 3234
aman181993 3235
himalay 3236
shivamg_isc 3237
aditya1995 3238
sophisticated1 3239
i_coder 3240
ksh15 3241
meashish 3242
mrunique.01 3243
undercover1995 3244
uditiiita 3245
pranjuldb 3246
bitchplz 3247
dumb_ape 3248
neeraj1618 3249
shivshnkr 3250
tejavojjala 3251
vaibhavatul47 3252
ishabhgupta 3253
hasher12 3254
dbest077 3255
dikshant001 3256
yacoder 3257
raj29 3258
aconologia 3259
shailendra0792 3260
venky5556 3261
amankohli199

atibiti12345 3720
kshitij_07 3721
aayushsinghal 3722
math2do 3723
Goku743209 3724
rishabhdeepsingh98 3725
vipinkaushal 3726
Maggu 3727
xtinct 3728
teena.miki 3729
ShubhamP197 3730
saiaravindbv 3731
BluSky 3732
AKGP 3733
dhirajfx3 3734
lagang 3735
ag970221 3736
knight_coder 3737
dynamite_iit 3738
kratossiddhant 3739
shado_w 3740
devACE 3741
SJ99 3742
Phoenix-e-Re_en 3743
9151639599 3744
stark_boy 3745
vinayk 3746
shu22bham 3747
sp557 3748
rohankag7 3749
sujeetkr 3750
kaymas 3751
ankitraj 3752
kartikey_21 3753
faizan_khan 3754
ankit.p.s 3755
Enigma_28 3756
BAJUKA 3757
anmol.bhandari.eee13 3758
chhavij 3759
s0nskar 3760
gaurav6225 3761
codgeek 3762
--Albus_Severes_Potter-- 3763
001anish 3764
Harshit_iit 3765
amber0820 3766
walterwhite091 3767
curiouscoder_cc 3768
parvjain.cd.cse17 3769
coolkps 3770
pandakkk 3771
redPandakkk 3772
jainvandit15 3773
TheFaker 3774
avinashk 3775
TricKsteR221098 3776
m00nl1ghT 3777
mukeshpurbey 3778
skc12340 3779
panda_man 3780
karan_veer 3781
confusecoder 3782

ismvarunsharma 4235
nitish_ism 4236
basrawala19 4237
jainadarsh 4238
gabish01 4239
hbnker31 4240
dzekoo 4241
aniket20 4242
nmalviya52 4243
bharathg 4244
manjunath1996 4245
straw_hat 4246
begin_hs 4247
vinod10 4248
shishirt22 4249
vinish_jail97 4250
neeladree 4251
ss_nitian 4252
topcoder_me 4253
banarun 4254
visleck 4255
kvsk 4256
sumit99531 4257
furious__ 4258
rp789 4259
nitish712 4260
Baadshah_ 4261
prasad5596 4262
ayushcool2654 4263
kepler 4264
donkarnage 4265
rishigurjar52 4266
PKC 4267
himkha_100 4268
utkarsh111 4269
anichavan20 4270
kalpitshah 4271
hitesh_001 4272
harishreddy2608 4273
saumyajit_dey 4274
soubhik_m 4275
chandanreddy 4276
jainatishay71 4277
sg1993 4278
indianauthority 4279
ak_718 4280
nayak20 4281
Raveeghur 4282
rohspeed 4283
mathmaniac 4284
PeterGriffin 4285
dsharma080 4286
Ayush_Gupta24 4287
ankit_btech 4288
sauravdahiya2010 4289
abhinash2k16 4290
VibhorDodeja 4291
kautsiitd 4292
happy2332 4293
kuldeepsharma1312 4294
kanishgarg 4295
parthdhar 4296
Udit_Jain_iitd 42

kharita 4759
mastcoder26 4760
arch_45 4761
aayush1771 4762
ekkagra 4763
souravirus 4764
saanjh 4765
mak26 4766
narendraj9 4767
abhgangwar 4768
kingkoprbha 4769
Tarik 4770
bansal95 4771
jaykay12 4772
aryankhandal0 4773
aarsh01rsh 4774
coolanmol 4775
muskan75 4776
Dark_ArMy 4777
sam73 4778
indotech 4779
Pankake 4780
shivansh007 4781
vipin1998 4782
PP98 4783
kamlesh_cf 4784
e_coder 4785
ironstark 4786
anurag21raghav 4787
av.driftking 4788
premkamal 4789
dynamic_coder 4790
pipipzz 4791
Animax13 4792
jalaj 4793
codeshaker 4794
abhisheklfs 4795
nuke007 4796
marvel308 4797
bunnymund 4798
_saty_ 4799
nyble 4800
anudeex_cr7 4801
x_calibre 4802
rahuldps 4803
daringcool 4804
subham.amrit 4805
vaibhav95 4806
samridhi 4807
baver 4808
avinash.42 4809
rishabh1403 4810
decoder304 4811
VC9 4812
maniac_01 4813
regex1 4814
kdarknight 4815
adityaarun1 4816
naivcit19 4817
amora 4818
shbhmpndy 4819
riyasnh 4820
piyushchauhan2011 4821
wuxifinger 4822
surfer007 4823
dheerajkp_10 4824
riteish 4825
debalina.roy

Vedhachala 5285
shank_punk 5286
nandu6177 5287
aswinashok44 5288
codeatamrita 5289
vector96 5290
mukeshchugani10 5291
sachithg 5292
Skr379 5293
rsunny 5294
qhead 5295
jayadev.haddadi 5296
christy8317 5297
star_gazer 5298
Saiphani724 5299
betadash 5300
_greymatter 5301
cpragadeesh 5302
krishnannakul 5303
Yashwanth 5304
shinigami_pro 5305
Akhil_me 5306
mr.minion 5307
Dexter420 5308
yash310397 5309
ramprasadh 5310
Nikhil_Mahendran 5311
srinath263 5312
manoj1729 5313
arjun.prs 5314
vathzsri 5315
Gowtham_Venkat 5316
mathi_msdian 5317
gradhakrishnan 5318
s_a_k 5319
progak 5320
vinoth03 5321
vivoxie 5322
vasanthaganesh 5323
maj0rt0m 5324
Haresh_V 5325
Gowtham96 5326
_santyssk_ 5327
satya1612 5328
iheartT 5329
UTpH 5330
tssurya 5331
CodeCracker123 5332
sreejith 5333
briyani 5334
hpa16 5335
Sreenu_97 5336
akarthic16 5337
adithya2000 5338
Ram_Newton 5339
darshanshah 5340
Goutham_VG 5341
drunkskunk97 5342
ramnithin 5343
mohamed.mahmoud 5344
apoorv_01 5345
Wizard_31 5346
_Agent13_ 5347
Viswanath04

rishwanth 5804
thrashing 5805
vigneshjayavel 5806
mohan1005k 5807
imback 5808
abilash94 5809
lakshmi8 5810
jayasurya006 5811
thehalberdier 5812
KimiRaikko 5813
krishna95 5814
sankar96 5815
zetabyte 5816
guru_pirasad1 5817
Arceus 5818
uvsankar 5819
sakthi60195 5820
eaugene 5821
nithinsanjey 5822
saiprasanna94 5823
Tokustar 5824
he_he 5825
oculto 5826
hatim009 5827
rishabhjain1996 5828
yatin 5829
wadhwasahil 5830
_benson 5831
deebhatia 5832
suhailkhan408.sk 5833
har_vi 5834
magician_ 5835
amish_gupta 5836
gupta_rishabh 5837
eka1cha25414 5838
md.mehrab 5839
g_one 5840
imgeek 5841
saifryzwi 5842
shroudIsBack 5843
ahsankamal 5844
faheel 5845
amir29 5846
RogerFederer9 5847
prakash11 5848
Arshilgenius 5849
wajid 5850
harshit15 5851
ryuga222 5852
spaul100 5853
ashwin9686 5854
forgotter 5855
mriganga_10 5856
spectrums 5857
_revive_k_ 5858
kunal05 5859
hardLuck 5860
sandy_033 5861
_eXe_ 5862
thesis_007 5863
darpanjbora 5864
Sannidhi09 5865
shivamneeraj23 5866
Loftier 5867
bornon13oct 5868
GAURAV

_goku_ 6322
POISION 6323
uttu316 6324
kripa_jha007 6325
Abhishek_satyam 6326
Man_of_Silence 6327
yogesh_ 6328
manishtolani12 6329
devil202 6330
gourab19964u 6331
sameer_hack 6332
shahanwaz 6333
code_karma 6334
RiPBeat 6335
naimish_singh 6336
gautamk 6337
drjaat 6338
hannukamania 6339
himan_kash_27 6340
subham_pasari 6341
sanya_8 6342
alok_kumar1121511 6343
niksjain1 6344
seal_7 6345
himanshu802 6346
ShivamK 6347
striver_79 6348
aniket0212 6349
asipu_pawan 6350
Subash23 6351
rb2041 6352
theabhijitak11 6353
sayani_02 6354
imranasuman 6355
atanuc13 6356
isocyanide7 6357
rupesh_rao 6358
sudiptoJGEC 6359
aman0309 6360
utkarshpandey6 6361
Iamkausar 6362
ashish_sharmax 6363
richik 6364
mukul166 6365
princeshivam162 6366
diya0999 6367
gunjan10 6368
ankita142000 6369
abhisekssp4025 6370
Biswas8927 6371
guptaishwar1998 6372
animeshdey765 6373
Is97 6374
avi892nash 6375
p_unit 6376
arjun8115 6377
aman9598 6378
abhishek20113 6379
ayushmanbhava 6380
krpiyush5 6381
aman_singh 6382
amansonkr 6383
atul

In [33]:
print(userCount) # Total users

6481


In [34]:
# Soring final list of users

with open('./data/user_filtered.json', 'w') as fp:
    json.dump(filteredList, fp)