Imports

In [2]:
#Omid55
import requests
import pandas as pd
import pickle as pk
import csv
import time
import numpy as np
import random

import seaborn as sns
import matplotlib.pylab as plt
% matplotlib inline

In [3]:
URL = {
    'BASE': 'https://{proxy}.api.pvp.net/api/lol/{region}/{rest}',
    'STATIC_BASE': 'https://global.api.pvp.net/api/lol/static-data/{region}/v1.2/{rest}',
    
    'MATCHLIST_URL': 'v2.2/matchlist/by-summoner/{summonerId}?seasons=SEASON{season}',
    'MATCH_URL': 'v2.2/match/{matchId}',
    'CHAMPION_URL': 'champion/{id}?champData=all',
    'SUMMONER_URL': 'v1.4/summoner/{summonerId}',
    #'SUMMONER_WINS_LOSSES_URL': 'v2.5/league/by-summoner/{summonerId}'  # NOT TRUE (VERY LARGE NUMBERS)
    'SUMMONER_WINS_LOSSES_PER_CHAMPION_URL': 'v1.3/stats/by-summoner/{summonerId}/ranked?season=SEASON{season}'
    
    # /api/lol/{region}/v1.3/stats/by-summoner/{summonerId}/ranked: this is for getting the experience 
    #  of player (summonerId) with different champions and also ALL EXPERIENCE one person has
    
    # /api/lol/{region}/v1.3/game/by-summoner/{summonerId}/recent: games that one summoner plays 
    #  with other people
}

REGIONS = {
    'north america': 'na',
    'europe west': 'euw'
}

In [4]:
class Match(object):
    
    def __init__(self):
        self.winners = []
        self.losers = []
        self.duration = -1
        

In [5]:
class RiotAPI(object):
    
    def __init__(self, api_key, region=REGIONS['north america']):
        self.api_key = api_key
        self.region = region
        self.champions = {}
#         self.champions_allinfo = {}
#         self.champions_allinfo_saved = False
        self.summoner_wins_losses = {}
        
    def _request(self, base, rest, params={}):
        args = {'api_key': self.api_key}
        args.update(params)
        response = requests.get(
            URL[base].format(
                rest=rest,
                proxy=self.region,
                region=self.region,
            ),
            params=args
        )
        #print(response.request.url)
        time.sleep(1.2)
        return response.json()
    
    def _base_request(self, rest, params={}):
        return self._request('BASE', rest, params)
        
    def _static_request(self, rest, params={}):
        return self._request('STATIC_BASE', rest, params)
    
    # functions
    def get_summoner_level(self, sid):
        rest = URL['SUMMONER_URL'].format(
            summonerId=sid
        )
        return self._base_request(rest)
    
    def _get_list_of_match_ids(self, sid, season):
        rest = URL['MATCHLIST_URL'].format(
            summonerId=sid,
            season=season
        )
        result = self._base_request(rest)
        if 'matches' in result:
            for match in result['matches']:
                yield match['matchId']
                
    def _get_match(self, mid):
        rest = URL['MATCH_URL'].format(
            matchId=mid
        )
        result = self._base_request(rest)
        return result
        
    def get_champion_all_info(self, championId):
        rest = URL['CHAMPION_URL'].format(
            id=championId
        )
        return self._static_request(rest)
    
#     def get_champion_index(self, championId):
#         if championId not in self.champions:
#             myid = len(self.champions)
#             self.champions_allinfo[myid] = self.get_champion_all_info(championId)
#             if not self.champions_allinfo_saved and len(self.champions_allinfo) == 132:
#                 with open('DATAChampionsAllInfo.pkl', 'wb') as output:
#                     pk.dump(self.champions_allinfo, output)
#                     self.champions_allinfo_saved = True
#             self.champions[championId] = myid
#         return self.champions[championId]
    
    def get_matches_champions_and_summonerIds_before_game(self, season, just_Ids=True):
        #for sid in range(1,1000000):    #for sid in range(1000000,5000000):
        with open('summonerId_list.pkl', 'rb') as f:
            all_summoners = pk.load(f)
        summoners = [ all_summoners[i] for i in sorted(random.sample(range(len(all_summoners)), 3000)) ]
        for sid in summoners:
            matchids = self._get_list_of_match_ids(sid, season)
            for matchid in matchids:
                match = self._get_match(matchid)
                if 'participants' in match:
                    losers = []
                    winners = []
                    winners_sid = []
                    losers_sid = []
                    
                    for member in match['participants']:
                        suId = [pi['player']['summonerId'] for pi in match['participantIdentities'] if pi['participantId']==member['participantId']]#[0]
                        if member['stats']['winner']:
                            winners_sid += suId
                            
                            if just_Ids:
                                winners += [member['championId']]
                            else:
                                winners += (self.get_champion_capabilities(member['championId']))
                        else:
                            losers_sid += suId
                            
                            if just_Ids:
                                losers += [member['championId']]
                            else:
                                losers += (self.get_champion_capabilities(member['championId']))
                    data = {'matchId': match['matchId'], 'duration': match['matchDuration'], 'champions': winners + losers, 'summoners': winners_sid + losers_sid}
                    yield data
                    
    # --------------------------------------------------------------------------------------
    def get_summoner_wins_losses(self, sid, season):
        key = str(sid)+','+str(season)
        if key not in self.summoner_wins_losses:
            res = {}
            rest = URL['SUMMONER_WINS_LOSSES_PER_CHAMPION_URL'].format(
                summonerId=sid,
                season=season
            )
            result = self._base_request(rest)
            if 'champions' in result:
                for ch in result['champions']:
                    if ch['id']:
                        res[ch['id']] = [ch['stats']['totalSessionsWon'], ch['stats']['totalSessionsLost']]
            self.summoner_wins_losses[key] = res
        return self.summoner_wins_losses[key]
        
    def get_win_stats(self, team_summoners, team_champs, season):
        game = []
        win_ratio = []
        same_champ_game = []
        same_champ_win_ratio = []
        for i in range(5):
            ch_history = self.get_summoner_wins_losses(team_summoners[i], season=season)
            if ch_history:
                if team_champs[i] in ch_history:
                    a = ch_history[team_champs[i]]
                    same_champ_game.append(a[0] + a[1])
                    same_champ_win_ratio.append(a[0]/float(a[0] + a[1]))
                a = np.sum(list(ch_history.values()), axis=0)
                game.append(a[0] + a[1])
                win_ratio.append(a[0]/float(a[0] + a[1]))
        if len(game) < 4:
            return []
        else:
            return [
                np.average(game),
                np.std(game),
                np.average(win_ratio),
                np.std(win_ratio),
                np.average(same_champ_game),
                np.std(same_champ_game),
                np.average(same_champ_win_ratio),
                np.std(same_champ_win_ratio)
                ]
    # --------------------------------------------------------------------------------------
    
    def get_matches_champions_and_summonerIds_before_game_from_given_set_of_summoners(self, season, given_summoners, just_Ids=True):
        for sid in given_summoners:
            matchids = self._get_list_of_match_ids(sid, season)
            for matchid in matchids:
                match = self._get_match(matchid)
                if 'participants' in match:
                    losers = []
                    winners = []
                    winners_sid = []
                    losers_sid = []

                    for member in match['participants']:
                        suId = [pi['player']['summonerId'] for pi in match['participantIdentities'] if pi['participantId']==member['participantId']]#[0]
                        if suId not in given_summoners:
                            nodata=True
                            break
                        if member['stats']['winner']:
                            winners_sid += suId

                            if just_Ids:
                                winners += [member['championId']]
                            else:
                                winners += (self.get_champion_capabilities(member['championId']))
                        else:
                            losers_sid += suId

                            if just_Ids:
                                losers += [member['championId']]
                            else:
                                losers += (self.get_champion_capabilities(member['championId']))
                    if nodata:
                        continue
                    data = {'matchId': match['matchId'], 'duration': match['matchDuration'], 'champions': winners + losers, 'summoners': winners_sid + losers_sid}
                    yield data
                  
                    
with open('../MyKey2.key', 'r') as key_file:
    KEY = key_file.readline().strip()
api = RiotAPI(KEY)

# Run the script

In [6]:
LIMIT = 11000
dt = api.get_matches_champions_and_summonerIds_before_game(season=2016)

data = []
for d in dt:
    data.append(d)
    if not len(data) % 1000:
        print(len(data))
    if len(data) > LIMIT:
        break
        
# with open('DATAChampionIndices.csv', 'w') as fp:
#     a = csv.writer(fp, delimiter=',')
#     a.writerows(champions_data)
# with open('DATASummonerIds.csv', 'w') as fp:
#     a = csv.writer(fp, delimiter=',')
#     a.writerows(summoners_data)

1000
2000
3000
4000
5000
6000
7000
8000


KeyboardInterrupt: 

In [7]:
print(len(data), 'samples are saving...')
with open('LargeDataChampionProficiencyPlaynet.pkl', 'wb') as dfile:
    pk.dump(data, dfile)
print('Done.')

8586 samples are saving...
Done.


In [7]:
# if less than 8 people are known we can remove them

In [8]:
# to_be_deleted = []
# for i, d in enumerate(dat):
#     c = 0
#     for sid in d['summoners']:
#         if api.get_summoner_wins_losses(sid, 2015):
#             c += 1
#     if c < 8:
#         to_be_deleted.append(i) 

Just to find the maximum number of play by one of the summoners

In [245]:
c = 0
maxxc = 0
maxx = 0
for d in data[:100]:
    vs = []
    for sid in d['summoners']:
        l = list(api.get_summoner_wins_losses(sid, 2015).values())
        if l:
            vs.append(sum(np.sum(l,axis=0)))
    v = np.average(vs)
    if v > maxx:
        maxx = v
        maxxc = c
    c += 1
print(maxxc, maxx)

28 658.375


In [246]:
vs = []
for sid in data[maxxc]['summoners']:
    l = list(api.get_summoner_wins_losses(sid, 2015).values())
    if l:
        print(sid, np.sum(l))

23340536 1176
66923827 797
67829841 588
32567408 1481
22713405 577
27210448 359
61585441 243
5 46


In [247]:
data[maxxc]

{'champions': [44, 11, 1, 78, 21, 412, 58, 111, 103, 81],
 'duration': 1921,
 'matchId': 2185829507,
 'summoners': [23340536,
  66923827,
  67829841,
  32567408,
  22713405,
  27210448,
  61585441,
  70519987,
  5,
  59771492]}

In [8]:
dt = []
sampl_data = [data[i] for i in sorted(random.sample(range(len(data)), 1500))]
for d in sampl_data:
    if len(d['champions'])==10:
        winner = api.get_win_stats(d['summoners'][:5], d['champions'][:5], 2015)
        if not winner:
            continue
        loser = api.get_win_stats(d['summoners'][5:], d['champions'][5:], 2015)
        if not loser:
            continue
        dt.append(winner + loser)
        if len(dt) % 100 == 0:
            print(len(dt))
dataset = pd.DataFrame(data=dt, columns=['winner_avg_game_count', 'winner_std_game_count', 'winner_avg_win_ratio', 'winner_std_win_ratio', 'winner_avg_same_champion_game_count', 'winner_std_same_champion_game_count', 'winner_avg_same_champion_win_ratio', 'winner_std_same_champion_win_ratio', 'loser_avg_game_count', 'loser_std_game_count', 'loser_avg_win_ratio', 'loser_std_win_ratio', 'loser_avg_same_champion_game_count', 'loser_std_same_champion_game_count', 'loser_avg_same_champion_win_ratio', 'loser_std_same_champion_win_ratio'])

dataset.to_csv('Stat2015_for_Classification2016_Dataset.csv')



# Filtering summoners using history of games in 2015 and 2016 stats

In [294]:
from scipy.stats import pearsonr as corr

In [469]:
def plot_distribution_of_correlation(summoner_ids):
    r = []
    p = []
    for sid in summoner_ids:
        h1 = api.get_summoner_wins_losses(sid, 2015)
        h2 = api.get_summoner_wins_losses(sid, 2016)
        wr1 = []
        wr2 = []
        for k in list(set(h1.keys()) & set(h2.keys())):
            wr1.append(h1[k][0]/float(h1[k][0]+h1[k][1]))
            wr2.append(h2[k][0]/float(h2[k][0]+h2[k][1]))
    #         wr1.append(h1[k][0]+h1[k][1])
    #         wr2.append(h2[k][0]+h2[k][1])
        c = corr(wr1, wr2)
        r.append(c[0])
        p.append(c[1])
    plt.hist(np.array(r)[np.where(np.array(p)<0.05)])

Selecting summoners based on 80% of conflicting champion choice

In [423]:
with open('summonerId_list.pkl', 'rb') as dfile:
    summoners = pk.load(dfile)
good_sids = []
for sid in summoners:
    h1 = api.get_summoner_wins_losses(sid, 2015)
    h2 = api.get_summoner_wins_losses(sid, 2016)
    c1 = len(set(list(h1.keys()) + list(h1.keys())))
    if c1 !=0 and len(list(set(h1.keys()) & set(h2.keys()))) / c1 < 0.8:
        continue
    good_sids.append(sid)
print(len(good_sids))
plot_distribution_of_correlation(good_sids)

1245


Selecting summoners based on positive (larger than 0.3) of 2015 and 2016 win ratio

In [None]:
with open('summonerId_list.pkl', 'rb') as dfile:
    summoners = pk.load(dfile)
gggood_sids = []
for sid in summoners:
    h1 = api.get_summoner_wins_losses(sid, 2015)
    h2 = api.get_summoner_wins_losses(sid, 2016)
    wr1 = []
    wr2 = []
    for k in list(set(h1.keys()) & set(h2.keys())):
        wr1.append(h1[k][0]/float(h1[k][0]+h1[k][1]))
        wr2.append(h2[k][0]/float(h2[k][0]+h2[k][1]))
    if len(wr1)<3 or len(wr2)<3:
        continue
    c = corr(wr1, wr2)
    if c[1]<1 and c[0]>0.3:
        gggood_sids.append(sid)
print(len(gggood_sids))
plot_distribution_of_correlation(gggood_sids)

In [472]:
dt = api.get_matches_champions_and_summonerIds_before_game_from_given_set_of_summoners(2016, good_sids)
LIMIT = 1200
data = []
for d in dt:
    data.append(d)
    #if not len(data) % 1000:
    print(len(data))
    if len(data) > LIMIT:
        break
        

[25424259] was not there!
[537815] was not there!
[32030390] was not there!
[42162046] was not there!
[69982935] was not there!
[53439336] was not there!
[52817600] was not there!
[55476681] was not there!
[28751181] was not there!
[20359907] was not there!
[44613524] was not there!
[26133116] was not there!
[34270934] was not there!
[31741403] was not there!
[23340536] was not there!
[69043055] was not there!


KeyboardInterrupt: 

In [2]:
import pickle as pk

In [4]:
with open('summonerId_list.pkl', 'rb') as dfile:
    summoners = pk.load(dfile)
with open('summonerId_list_py2.pkl', 'wb') as f:
    pk.dump(summoners, f, protocol=2)