In [2]:
import requests
import json
import pandas as pd
import tqdm
import time

In [3]:
def lastfm_get(payload):
    # define headers and URL
    headers = {'user-agent': 'EDU_PROJECT'}      # здесь впишите что-нибудь другое, если начнут банить аккаунты - 
                                                 # могут всех скопом с таким user-agent'ом подбанить :)
    url = 'https://ws.audioscrobbler.com/2.0/'

    # Add API key and format to the payload
    payload['api_key'] = 'api_key' # подставить свой апи ключ
    payload['format'] = 'json'

    response = requests.get(url, headers=headers, params=payload)
    return response

In [4]:
def get_core_users_list(user):
    core_users = []
    # получим всех друзей 1 юзера и используем их в качестве основы
    req = lastfm_get({
        'user': user,
        'method': 'user.getFriends',
        'limit': 200
    })
    for i in range(len(req.json()['friends']['user'])):
        try:
            core_users.append(req.json()['friends']['user'][i]['name'])
        except KeyError:
            continue
    return core_users

In [230]:
# протестим работоспособность. найдем друзей юзера
core = get_core_users_list('joanofarctan')
core

['pellitero',
 'MTEver',
 'ben-xo',
 'jarvincolina',
 'arrowsparrow',
 'red-green-blue',
 'sound-and-music',
 'gekas',
 'play6ox',
 'HelloImFrench-',
 'etyo',
 'patrickfarmer',
 'eva',
 'gurvansh',
 'RitmoLondon',
 'SohailBhatia',
 'skr',
 'E1i45',
 'wannesdeprez',
 'DISK-CTM',
 'juliuzseizure',
 'Edouard',
 'AFFIN',
 'mknx',
 'jwheare',
 'afonsoduarte',
 'gorakh',
 'Orlenay',
 'maxi',
 'schlagschnitzel',
 'alexanderwendt',
 'alcxxk',
 'michalboo',
 'underpangs',
 'des_encanto',
 'Dinos24',
 'fabiodebe',
 'Schrollum',
 'bashford',
 'jack_faith',
 'nananaina',
 'naniel',
 'Daniel1986',
 'pablosolana',
 'MiserableLie',
 'nimtree',
 'musicmobs',
 'LQDAudio',
 'Vascolar',
 'dunk',
 'jensnikolaus',
 'isentropy',
 'dcmserra',
 'symphonicknot',
 'car0lus',
 'Efe75',
 'd-a-i-n',
 'james_knight',
 'Keefj',
 'holyroarrecords',
 'arrdis',
 'RUPERT',
 'daveknapik',
 'dickykhota',
 'theneonfever',
 'spietsch',
 'whydelila',
 'barnacleboy',
 'fuckcity',
 'Hakuin96',
 'mxcl',
 'douglasdb',
 'juliasve

In [5]:
def parse_friends_for_core_users(core_list):
    user_list = []
    for user in core_list:
        try:
            if user not in user_list:
                temp_list = get_core_users_list(user)
            else:
                continue
            user_list += temp_list
            user_list = list(set(user_list))
            print('current user count:',len(user_list))
        except Exception:
            continue
        if len(user_list) > 10000:
            break
    return user_list

In [6]:
def get_user_tastes(user_name):
    cols = ['name', 'rank', 'mbid', 'artist', 'song_name', 'duration', 'count']
    df = pd.DataFrame(columns=cols)
    # используем 2 метода 
    t_info  = lastfm_get({
    'user': user_name,
    'method': 'user.getTopTracks',
    'limit': 100
    })
    if t_info.status_code != 200:
        return 0
    for i in range(len(t_info.json()['toptracks']['track'])):
        try:
            name = user_name
            rank = int(t_info.json()['toptracks']['track'][i]['@attr']['rank'])
            mbid = t_info.json()['toptracks']['track'][i]['mbid']
            artist = t_info.json()['toptracks']['track'][i]['artist']['name']
            song_name = t_info.json()['toptracks']['track'][i]['name']
            duration = int(t_info.json()['toptracks']['track'][i]['duration'])
            count = int(t_info.json()['toptracks']['track'][i]['playcount'])
            
            df.loc[len(df)] = [name, rank, mbid, artist, song_name, duration, count]
            
        except Exception:
            continue
    return df

In [7]:
def get_user_info(user_name):
    u_info  = lastfm_get({
    'user': user_name,
    'method': 'user.getInfo'
    })
    if u_info.status_code != 200:
        if u_info.json()['error'] == 6:
            return 6
    try:
        login = user_name
        name = u_info.json()['user']['realname']
        country = u_info.json()['user']['country']
        age = u_info.json()['user']['age']
        gender = u_info.json()['user']['gender']
        playcount = u_info.json()['user']['playcount']
        subscriber = int(u_info.json()['user']['subscriber'])
        registered = int(u_info.json()['user']['registered']['unixtime'])
            
        row = [login, name, country, age, gender, playcount, subscriber, registered]
            
    except Exception:
        return 1
    return row

In [8]:
def get_stat_by_list(user_list):
    ucols = ['login', 'name', 'country', 'age', 'gender', 'playcount', 'subscriber', 'registered']
    userdf = pd.DataFrame(columns=ucols)
    icols = ['name', 'rank', 'mbid', 'artist', 'song_name', 'duration', 'count']
    tastedf = pd.DataFrame(columns=icols)
    
    for user_name in tqdm.tqdm(user_list):
     #   time.sleep(2)
        try:
            uinfo = get_user_info(user_name)
            if type(uinfo) is list:
                userdf.loc[len(userdf)] = uinfo
            elif uinfo == 0:
                print('Error occured :(')
                return userdf, tastedf
            elif uinfo == 6:
                continue
            tdf = get_user_tastes(user_name)
            if type(tdf) is int:
                print('Error occured :(')
                return userdf, tastedf
            else:
                tastedf = tastedf.append(tdf, ignore_index=True)
        except Exception:
            continue
    return userdf, tastedf

### Код начинается здесь

In [9]:
file = open("user_list.txt", "r")
mlist = file.read()
user_list = mlist.split("\n")
file.close()

#### Менять нужно только параметры диапазона [100:120] - предлагаю распределить с [5000:8000] - Костя, [8000:] - Дима. 
#### Рекомендую начать с небольших диапазонов. Если выполнение валится на каком-то значении, рекомендую его скипнуть.

In [34]:
infodf, tastedf = get_stat_by_list(out) # здесь подставляйте в диапазон свои значения

infodf.to_csv('user_info.csv', mode='a', index=False)
tastedf.to_csv('taste_info.csv', mode='a', index=False)


100%|██████████████████████████████████████████████████████████████████████████████| 1198/1198 [38:16<00:00,  1.92s/it]


In [47]:
taste = pd.read_csv('taste_info.csv')
taste

Unnamed: 0,name,rank,mbid,artist,song_name,duration,count
0,AccountNumber17,1,0ff8718d-d9f6-4666-9133-b34d933bb45c,Arctic Monkeys,Fluorescent Adolescent,172,209
1,AccountNumber17,2,001b9408-4de0-4deb-aaf1-b74500e0ff5b,BTS,Butter,164,206
2,AccountNumber17,3,00d0985d-278e-4a72-b959-fc184a1ce990,Arctic Monkeys,505,246,173
3,AccountNumber17,4,22b8105b-7667-33b6-b59a-538ba40e5606,Arctic Monkeys,Only Ones Who Know,182,163
4,AccountNumber17,5,,BTS,Boy With Luv (feat. Halsey),229,156
...,...,...,...,...,...,...,...
475264,Macthehat,96,093b1877-48cf-395c-a6f0-48941369a6ec,Super Furry Animals,Play It Cool,195,49
475265,Macthehat,97,09d442a0-0a0c-4ea8-9d45-41594d20d9c6,The Wedding Present,Kennedy,0,49
475266,Macthehat,98,83ce0f1f-9999-379b-9e1b-45ab37042129,its a buffalo,Somewhere In Range,216,48
475267,Macthehat,99,e5da2d95-1e0e-4636-8412-66e39de337d1,Andy Roberts,Queen of the Moonlight World,289,47


In [43]:
users = pd.read_csv('user_info.csv')
users

Unnamed: 0,login,name,country,age,gender,playcount,subscriber,registered
0,AccountNumber17,Amanda,Brazil,0,n,70009,0,1242686965
1,AccountNumber17,Amanda,Brazil,0,n,70009,0,1242686965
2,AccountNumber17,Amanda,Brazil,0,n,70088,0,1242686965
3,AceCrikey,Edd Smith,United Kingdom,0,n,22757,0,1206918728
4,AcidFeelings,,Germany,0,n,38293,0,1362166005
...,...,...,...,...,...,...,...,...
4995,LSacchini,,Italy,0,n,17863,0,1430509555
4996,NotNowOK,Lynne,United States,0,n,27836,0,1186319023
4997,MrFeles,Felix,Netherlands,0,n,79346,0,1292454736
4998,MojMilane,,,0,n,5388,0,1229527020
