In [14]:
import requests
import time
import pandas as pd
import numpy as np
import re
import json
import pdb
from bs4 import BeautifulSoup
import pickle
import personal_keys
import math

# fav_funcs.py

## fav_request
makes request to API, returns json file of user

In [4]:
def fav_request(username, page_size, page):
    favs_url = 'https://api.ravelry.com/people/{}/favorites/list.json'.format(username)
    favs = requests.get(favs_url, auth = (personal_keys.username(),personal_keys.password()),
                        params={'page_size':page_size, 'page':page})
    return favs.json()

### example output:

In [5]:
fav_request("katec125", 2, 0).keys()

dict_keys(['favorites', 'paginator'])

## create_fav_list
input fav json, output list of fav pattern codes

In [6]:
def create_fav_list(fav_request):
    fav_list = []
    favorites = fav_request['favorites']
    for item in range(0,len(favorites)):
        if favorites[item]['favorited'] is not None:
            if 'pattern_id' in favorites[item]['favorited'].keys():
                fav_list.append(favorites[item]['favorited']['pattern_id'])
            elif 'id' in favorites[item]['favorited'].keys():
                fav_list.append(favorites[item]['favorited']['id'])
    return [code for code in fav_list if code is not None]

### example output: 

In [11]:
fav_json = fav_request("katec125", 2, 0)
create_fav_list(fav_json)

[1017571, 777372]

## get_favs_list
* Input is username, output is list of pattern ids in a person's favorites
* Essentially combination of fav request and create_fav_list
* If the user has over 500 favorites, then only returns the first 500. This is done so other functions don't take forever

In [7]:
def get_favs_list(username):
    favs = fav_request(username, 100, 1)
    fav_list = create_fav_list(favs)
    if favs['paginator']['page_count']>1:
        page_number = 2
        if favs['paginator']['last_page'] > 5:
            last_page = 5
        else:
            last_page = favs['paginator']['last_page']
        while page_number <= last_page:
            new_request_favs = fav_request(username,100, page_number)
            fav_list.extend(create_fav_list(new_request_favs))
            page_number += 1
    return fav_list

### Example output 

In [14]:
get_favs_list("katec125")[:10]

[1017571,
 777372,
 1002216,
 998422,
 877616,
 990541,
 966823,
 444597,
 888409,
 420814]

## fav_dict
* Returns a dict that is just pattern code and 1 for every value
* This is done so we can then do the proj rating func to create user data column

In [9]:
def fav_dict (username):
    fav_list = get_favs_list(username)
    return {code:1 for code in fav_list}

In [21]:
fav_dict("katec125")[1017571]

1

# proj_funcs.py

## proj_json
returns json file with all the info about user patterns

In [10]:
def proj_json(username):
    projects_url = 'https://api.ravelry.com/projects/{}/list.json'.format(username)
    projects = requests.get(projects_url, 
                        auth = (personal_keys.username(),personal_keys.password()))
    return projects.json()

### Example output:

In [23]:
proj_json("katec125").keys()

dict_keys(['projects', 'collections', 'paginator'])

## project_list
Input is a json obtained in the proj_json function, output is a list of pattern ids for user's projects

In [11]:
def project_list(p_json):
    proj_list = []
    proj_list.extend([p_json['projects'][item]['pattern_id'] 
                    for item in range(0,len(p_json['projects']))])
    return proj_list

### Example output:

In [25]:
p_json = proj_json("katec125")
project_list(p_json)[:10]

[669230, 118820, 490330, 103767, 91776, 216824, 990541, 582245, 921518, 497647]

## get_project_list_from_username

combination of the two previous functions


In [12]:
def get_project_list_from_username(username):
    p_json = proj_json(username)
    return project_list(p_json)

### Example output: 

In [27]:
get_project_list_from_username("katec125")[:10]

[669230, 118820, 490330, 103767, 91776, 216824, 990541, 582245, 921518, 497647]

## project_rating
* returns dictionary of project codes and user ratings, 
* default rating is 3 if none is given, 
* otherwise it's on a scale from 1-5 (hence the +1)

In [13]:
def project_rating(username):
    code_list = get_project_list_from_username(username)
    rating_list = [3 if proj_json(username)['projects'][i]['rating'] == None 
                    else proj_json(username)['projects'][i]['rating']+1 
                    for i in range(0,len(proj_json("katec125")['projects']))]
    return dict(zip(code_list, rating_list))

Example output:

In [29]:
project_rating("katec125")

{669230: 3,
 118820: 3,
 490330: 3,
 103767: 4,
 91776: 3,
 216824: 3,
 990541: 3,
 582245: 3,
 921518: 3,
 497647: 3,
 828182: 3,
 833778: 3,
 807214: 3,
 497856: 3,
 617705: 3,
 722427: 3,
 714457: 3,
 676427: 3,
 226669: 3}

## user_data 
* creates dictionary where the keys are all user's favorites and projects
* all favorites get a 1 assigned
* projects get either their rating or 3 assigned
* the final dict is turned into a string so it can combine with the existing user profile df

In [35]:
def user_data(username):
    user_data = fav_dict(username)
    user_data.update(project_rating(username))
    str_user_data_dict = {str(k):v for k,v in user_data.items()}
    return str_user_data_dict

Example output:

In [36]:
user_data("katec125")

{'1017571': 1,
 '777372': 1,
 '1002216': 1,
 '998422': 1,
 '877616': 1,
 '990541': 3,
 '966823': 1,
 '444597': 1,
 '888409': 1,
 '420814': 1,
 '790875': 1,
 '409979': 1,
 '538253': 1,
 '387775': 1,
 '994876': 1,
 '993218': 1,
 '975032': 1,
 '995685': 1,
 '731341': 1,
 '984587': 1,
 '384733': 1,
 '866947': 1,
 '932512': 1,
 '893880': 1,
 '986145': 1,
 '807636': 1,
 '936349': 1,
 '918043': 1,
 '907192': 1,
 '843846': 1,
 '866773': 1,
 '686988': 1,
 '966033': 1,
 '968150': 1,
 '869199': 1,
 '351173': 1,
 '824391': 1,
 '921518': 3,
 '843810': 1,
 '855203': 1,
 '925240': 1,
 '929139': 1,
 '570677': 1,
 '483117': 1,
 '639396': 1,
 '754362': 1,
 '824302': 1,
 '886241': 1,
 '561403': 1,
 '171113': 1,
 '57264': 1,
 '772466': 1,
 '497126': 1,
 '680396': 1,
 '868477': 1,
 '868478': 1,
 '497647': 3,
 '582257': 1,
 '377778': 1,
 '593997': 1,
 '838914': 1,
 '742300': 1,
 '858134': 1,
 '855568': 1,
 '763885': 1,
 '830861': 1,
 '853849': 1,
 '690209': 1,
 '843963': 1,
 '804284': 1,
 '853145': 1,
 '743

# friend_funcs.py
Note: I may not end up using these functions

Additionally, example outputs are not included here because I'm not here to put friend info out in a viewable notebook

## friend_username_list
The input is a username and the output is a list of the user's friends

In [32]:
def friend_username_list(username):
    user_url = 'https://api.ravelry.com/people/{}/friends/list.json'.format(username)
    user = requests.get(user_url, 
                        auth = (personal_keys.username(),personal_keys.password()))
    return [user.json()['friendships'][item]['friend_username'] 
            for item in range(0,len(user.json()['friendships']))]


## get_friend_favs
The input is a username, output is a list of the user's friends' favorites

In [None]:
def get_friend_favs(username):    
    friend_list = friend_username_list(username)
    all_friend_projs = []
    for user in friend_list:
        all_friend_projs.append(get_favs_list(user))   
    flat_list = [item for sublist in all_friend_projs for item in sublist]
    edited_flat_list = [item for item in flat_list if item is not None]
    return edited_flat_list

## get_friend_projs
The input is a username, output is a list of the user's friends' projects

In [33]:
def get_friend_projs(username):    
    friend_list = friend_username_list(username)
    all_friend_projs = []
    for user in friend_list:
        all_friend_projs.extend(get_project_list(user))        
    edited_proj_list = [item for item in all_friend_projs if item is not None]
    return edited_proj_list

# search_functions.py 

## search
Input is a search term of some sort, output is the json file containing search results

In [15]:
def search(term):
    search_url = 'https://api.ravelry.com/patterns/search.json?query={}'.format(term)
    search = requests.get(search_url, auth = (personal_keys.username(),personal_keys.password()))
    s_json = search.json()
    return s_json

### Example output:

In [36]:
search("cowl").keys()

dict_keys(['patterns', 'paginator'])

## search_pattern_list
Input is a json of searched patterns (the output of the previous function), output is a list of pattern ids.

In [16]:
def search_pattern_list(s_json):
    p_list = s_json['patterns']
    return [p_list[ind]['id'] for ind in list(range(0,len(p_list)))]


### Example output:

In [39]:
s_json = search("cowl")
search_pattern_list(s_json)[:10]

[819716,
 1024446,
 548384,
 1024387,
 1004479,
 713248,
 169260,
 942286,
 273024,
 1023884]

## search_to_list
This is just a combination of the two previous functions into one function. The input is a search term, output is a list of pattern ids


In [17]:
def search_to_list(term):
    s_json = search(term)
    return search_pattern_list(s_json)

### Example output:

In [41]:
search_to_list("cowl")[:10]

[819716,
 1024446,
 548384,
 1024387,
 1004479,
 713248,
 169260,
 942286,
 273024,
 1023884]

## or_string
This function turns list of terms into string separated by '%7C', which is 'or' in the ravelry search url.

In [3]:
def or_string(attr_list):
    return '%7C'.join(attr_list)

### Example output:

In [6]:
or_string(['dk', 'worsted', 'aran'])

'dk%7Cworsted%7Caran'

## fit_and_attr_split
Some of the pattern attributes are actually listed under 'fit' when creating the search url, so the pattern attributes need to be split into two lists. The first list is the list of attributes that would be searched under 'fit' and the other list is the attributes that would actually be listed under pattern attributes 

In [7]:
def fit_and_attr_split(attr_list):
    fit_name_list = ['adult','baby','child','doll-size',
 'newborn-size','preemie','teen','toddler',
 'negative-ease','no-ease','positive-ease',
 'maternity','fitted','miniature','oversized',
 'petite','plus','tall','female','male','unisex']
    attribute_list = []
    fit_list = []
    for item in attr_list:
        if item in fit_name_list:
            fit_list.append(item)
        else:
            attribute_list.append(item)
    return [fit_list, attribute_list]

### Example output:

In [8]:
example_attr_list = ['female', 'adult', 'fitted', 'lace',
                'stripes', 'ripple', 'seamed', 'crew-neck',
                'elbow-sleeve', 'bottom-up', 'written-pattern',
                'worked-flat','stripes-colorwork']
fit_and_attr_split(example_attr_list)


[['female', 'adult', 'fitted'],
 ['lace',
  'stripes',
  'ripple',
  'seamed',
  'crew-neck',
  'elbow-sleeve',
  'bottom-up',
  'written-pattern',
  'worked-flat',
  'stripes-colorwork']]

## unique_search_url_section
This function creates the string that is the unique part of the search url. In other words, this is the section that tells the search function what to search for.

In [9]:
def unique_search_url_section(attr_dict):
    yarn_list = create_yarn_list(attr_dict['yarn_weight'])
    attr_and_fit_list = fit_and_attr_split(attr_dict['pattern_attributes'])
    attr_list = attr_and_fit_list[1]
    fit_list = attr_and_fit_list[0]
    yarn_str = or_string(yarn_list)
    attr_str = or_string(attr_list)
    cat_str = or_string(attr_dict['pattern_categories'][1:])
    fit_str = or_string(fit_list)
    return 'weight={}&pa={}&pc={}&fit={}'.format(yarn_str,attr_str,cat_str, fit_str)


### Example output:

In [16]:
search_example_attr_dict = {'yarn_weight': 'Fingering',
 'pattern_attributes': ['female', 'adult', 'fitted', 'lace',
                        'stripes', 'ripple', 'seamed',
                        'crew-neck', 'elbow-sleeve',
                        'bottom-up','written-pattern',          
                        'worked-flat', 'stripes-colorwork'],
 'pattern_categories': ['clothing', 'sweater', 'pullover']}

unique_search_url_section(search_example_attr_dict)

'weight=fingering%7Clight-fingering%7Csport&pa=lace%7Cstripes%7Cripple%7Cseamed%7Ccrew-neck%7Celbow-sleeve%7Cbottom-up%7Cwritten-pattern%7Cworked-flat%7Cstripes-colorwork&pc=sweater%7Cpullover&fit=female%7Cadult%7Cfitted'

## full_search_url
This function takes the unique url section created in the previous section and adds that to the rest of the url needed to make a search query

In [18]:
def full_search_url(url_sect):
    return 'https://api.ravelry.com/patterns/search.json?{}&sort=recently-popular&view=captioned_thumbs'.format(url_sect)


### Example output

In [19]:
url_sect = unique_search_url_section(search_example_attr_dict)
full_search_url(url_sect)

'https://api.ravelry.com/patterns/search.json?weight=fingering%7Clight-fingering%7Csport&pa=lace%7Cstripes%7Cripple%7Cseamed%7Ccrew-neck%7Celbow-sleeve%7Cbottom-up%7Cwritten-pattern%7Cworked-flat%7Cstripes-colorwork&pc=sweater%7Cpullover&fit=female%7Cadult%7Cfitted&sort=recently-popular&view=captioned_thumbs'

## create_search_url

A combination of the two previous functions. It takes in a attribute dictionary and the output is a full search url.

In [20]:
def create_search_url(attr_dict):
    url_sect = unique_search_url_section(attr_dict)
    return full_search_url(url_sect)

### Example output:

In [21]:
create_search_url(search_example_attr_dict)

'https://api.ravelry.com/patterns/search.json?weight=fingering%7Clight-fingering%7Csport&pa=lace%7Cstripes%7Cripple%7Cseamed%7Ccrew-neck%7Celbow-sleeve%7Cbottom-up%7Cwritten-pattern%7Cworked-flat%7Cstripes-colorwork&pc=sweater%7Cpullover&fit=female%7Cadult%7Cfitted&sort=recently-popular&view=captioned_thumbs'

# yarn_weights.py

## create_yarn_list
The input is a yarn weight (such as "fingering", "dk", etc.), the output is a list of that yarn weight and the weight that is one lower or one higher.

This function also uses a dictionary I created that is a pickle file ```yarn_id_dict.p```. This dictionary is a dictionary where the keys are the numbers 0 through 11 and the values are the names of the various yarn weights. This is done for ease of calculating the yarn weight that is one lower and one higher of the input yarn weight.

In the event the input weight is either the lowest or the highest, the list will only be the input weight and the one either one lower or one higher than that input weight.

In [15]:
def create_yarn_list(input_weight):
    id_dict = pickle.load( open( "yarn_id_dict.p", "rb" ) )
    input_weight = input_weight.lower()
    yarn_list = [input_weight]
    for num,name in id_dict.items():
        if name == input_weight:
            input_id = num
    if input_id == 0:
        yarn_list.append(id_dict[1])
    elif input_id == 11:
        yarn_list.append(id_dict[10])
    else:
        yarn_list.extend((id_dict[input_id-1], id_dict[input_id+1]))
    return yarn_list

### Example output:

In [44]:
create_yarn_list('dk')

['dk', 'sport', 'worsted']

In [45]:
create_yarn_list('thread')

['thread', 'cobweb']

# pattern_attr_funcs.py

## multiple_pattern_request
The input is a list of pattern ids, output is a json file with those patterns. In the event the request throws an error due to a faulty pattern id, the function will return 404.

In [19]:
def multiple_pattern_request(pattern_list):
    pattern_list = [str(code) for code in pattern_list]
    patterns_url = 'https://api.ravelry.com/patterns.json?ids={}'.format('+'.join(pattern_list))
    patterns = requests.get(patterns_url, 
                            auth = (personal_keys.username(),personal_keys.password()))
    if patterns.status_code is 200:
        return patterns.json()
    else:
        return 404

### Example output:

In [50]:
example_pattern_list = [819716,
 1024446,
 548384,
 1024387,
 1004479,
 713248,
 169260,
 942286,
 273024,
 1023884]

multiple_pattern_request(example_pattern_list)['patterns'].keys()

dict_keys(['169260', '273024', '548384', '713248', '819716', '942286', '1004479', '1023884', '1024387', '1024446'])

## single_pattern_request
This is the same as multiple_pattern_request except there is only one pattern being requested instead of a list of patterns.

In [24]:
def single_pattern_request(code):
    if type(code) is not str:
        code = str(code)
    pattern_url = 'https://api.ravelry.com/patterns/{}.json'.format(code)
    pattern = requests.get(pattern_url, 
                            auth = (personal_keys.username(),personal_keys.password()))
    return pattern.json()['pattern']

### Example output:

In [53]:
single_pattern_request(1023884).keys()

dict_keys(['comments_count', 'created_at', 'currency', 'difficulty_average', 'difficulty_count', 'downloadable', 'favorites_count', 'free', 'gauge', 'gauge_divisor', 'gauge_pattern', 'generally_available', 'id', 'name', 'pdf_url', 'permalink', 'price', 'projects_count', 'published', 'queued_projects_count', 'rating_average', 'rating_count', 'row_gauge', 'updated_at', 'url', 'yardage', 'yardage_max', 'personal_attributes', 'sizes_available', 'product_id', 'currency_symbol', 'ravelry_download', 'download_location', 'pdf_in_library', 'volumes_in_library', 'gauge_description', 'yarn_weight_description', 'yardage_description', 'pattern_needle_sizes', 'notes_html', 'notes', 'packs', 'printings', 'yarn_weight', 'craft', 'pattern_categories', 'pattern_attributes', 'pattern_author', 'photos', 'pattern_type'])

## url_to_code
This function returns the last part of a given pattern url. This last part can be used to look up the pattern.


In [22]:
def url_to_code(url):
    split_list = url.split('https://www.ravelry.com/patterns/library/')
    return split_list[-1]

### Example output:

In [23]:
example_url = 'https://www.ravelry.com/patterns/library/oreti'
url_to_code(example_url)

'oreti'

## url_to_request
This is just a combination of url to code and single pattern request, the input is a url, and output is the pattern json

In [25]:
def url_to_request(url):
    code = url_to_code(url)
    return single_pattern_request(code)

### Example output:

In [27]:
url_to_request(example_url).keys()

dict_keys(['comments_count', 'created_at', 'currency', 'difficulty_average', 'difficulty_count', 'downloadable', 'favorites_count', 'free', 'gauge', 'gauge_divisor', 'gauge_pattern', 'generally_available', 'id', 'name', 'pdf_url', 'permalink', 'price', 'projects_count', 'published', 'queued_projects_count', 'rating_average', 'rating_count', 'row_gauge', 'updated_at', 'url', 'yardage', 'yardage_max', 'personal_attributes', 'sizes_available', 'product_id', 'currency_symbol', 'ravelry_download', 'download_location', 'pdf_in_library', 'volumes_in_library', 'gauge_description', 'yarn_weight_description', 'yardage_description', 'pattern_needle_sizes', 'notes_html', 'notes', 'packs', 'printings', 'yarn_weight', 'craft', 'pattern_categories', 'pattern_attributes', 'pattern_author', 'photos', 'pattern_type'])

## attrs_single_pattern
Input is the output of single_pattern_request, the output is a dictionary containing the pattern's yarn weight, pattern categories, and pattern attributes

In [21]:
def attrs_single_pattern(pattern):
    data = pattern['pattern_categories'][0]    
    df = pd.io.json.json_normalize(data)
    df = df.filter(regex = 'permalink$', axis = 1)
    atrib_dict = df.to_dict(orient='records')[0]
    cat_list = [v for v in atrib_dict.values() if v != 'categories']

    attr_dict = {'yarn_weight':'-'.join(pattern['yarn_weight']['name'].split(' ')),
    'pattern_attributes': [attr['permalink'] 
    for attr in pattern['pattern_attributes']],
    'pattern_categories':cat_list}
    return attr_dict

### Example output:

In [55]:
pattern = single_pattern_request(1023884)
attrs_single_pattern(pattern)

{'yarn_weight': 'Aran',
 'pattern_attributes': ['phototutorial',
  'written-pattern',
  'stripes-colorwork'],
 'pattern_categories': ['accessories', 'neck-torso', 'cowl']}

## single_request_to_attrs
This is just a combination of the two functions listed above. The input is a pattern code, output is the pattern's yarn weight, pattern attributes, and pattern categories.

In [22]:
def single_request_to_attrs(code):
    pattern = single_pattern_request(code)
    return attrs_single_pattern(pattern)

### Example output:

In [57]:
single_request_to_attrs(1023884)

{'yarn_weight': 'Aran',
 'pattern_attributes': ['phototutorial',
  'written-pattern',
  'stripes-colorwork'],
 'pattern_categories': ['accessories', 'neck-torso', 'cowl']}

## pattern_req
This makes the same request as multiple_pattern_request, except it returns the value of the patterns section of the mpr json.

In [23]:
def pattern_req(pattern_list):
    pattern_req = multiple_pattern_request(pattern_list)
    patterns = pattern_req['patterns']
    return patterns

### Example output:

In [61]:
pattern_req(example_pattern_list).keys()

dict_keys(['169260', '273024', '548384', '713248', '819716', '942286', '1004479', '1023884', '1024387', '1024446'])

## attr_dict
Input is a pattern list, output is a dictionary of pattern codes and attributes associated with each pattern

In [24]:
def attr_dict(pattern_list):
    patterns = pattern_req(pattern_list)
    attr_dict = {}
    for key in patterns.keys():
        attr_dict.update(({key:{"pa_{}".format(attr['permalink']):1 
        for attr in patterns[key]['pattern_attributes']}}))
    return attr_dict

### Example output:

In [65]:
attr_dict(example_pattern_list)['169260']

{'pa_female': 1,
 'pa_adult': 1,
 'pa_textured': 1,
 'pa_one-piece': 1,
 'pa_seamless': 1,
 'pa_written-pattern': 1,
 'pa_in-the-round': 1,
 'pa_slipped-stitches': 1}

## yarn_dict

Input is a pattern list, output is a dictionary of pattern codes and the yarn weight associated.

The yarn weight is given as ```yarn_id_[what you would put in for url]``` 

For example, for a pattern whose yarn weight is dk, the output would be ```yarn_id_dk``` and a pattern whose yarn id is light fingering, the output would be ```yarn_id_light-fingering```

note: since the name contains spaces, the name is split and put back together with dash where the space was

In the event there is not a yarn weight listed because the pattern writer was an idiot, pattern is assigned the yarn weight "yarn_id_None" as a placeholder


In [25]:
def yarn_dict(pattern_list):
    patterns = pattern_req(pattern_list)
    yarn_dict = {}
    for key in patterns.keys():
        if 'yarn_weight' in patterns[key]:    
            yarn_dict.update({key:
            {"yarn_id_{}".format('-'.join(patterns[key]['yarn_weight']['name'].split(' '))):1}})
        else:
            yarn_dict.update({key:{"yarn_id_None":1}})
    return yarn_dict

### Example output:

In [67]:
yarn_dict(example_pattern_list)

{'169260': {'yarn_id_DK': 1},
 '273024': {'yarn_id_Bulky': 1},
 '548384': {'yarn_id_Fingering': 1},
 '713248': {'yarn_id_Fingering': 1},
 '819716': {'yarn_id_Sport': 1},
 '942286': {'yarn_id_Worsted': 1},
 '1004479': {'yarn_id_Worsted': 1},
 '1023884': {'yarn_id_Aran': 1},
 '1024387': {'yarn_id_Sport': 1},
 '1024446': {'yarn_id_Bulky': 1}}

## categ_dict
Input is a pattern list, the output is dictionary with the pattern codes and the different categories each pattern is in.

In [26]:
def categ_dict(pattern_list):
    patterns = pattern_req(pattern_list)
    categ_dict = {}
    for key in patterns.keys():
        data = patterns[key]['pattern_categories'][0]    
        df = pd.io.json.json_normalize(data)
        df = df.filter(regex = 'permalink$', axis = 1)
        atrib_dict = df.to_dict(orient='records')[0]
        cat_dict = {"pc_{}".format(v):1 for v in atrib_dict.values() if v != 'categories'}
        categ_dict.update({key:cat_dict})
    return categ_dict

### Example output:

In [70]:
categ_dict(example_pattern_list)

{'169260': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '273024': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '548384': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '713248': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '819716': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '942286': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '1004479': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '1023884': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '1024387': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1},
 '1024446': {'pc_accessories': 1, 'pc_neck-torso': 1, 'pc_cowl': 1}}

## all_attr_dict
Input is a pattern list, it creates dictionary of the pattern and the pattern attributes, categories, and yarn weight all in one dict

Essentially, it combines three previous functions plus compressing all three dictionaries into one

Additionally, it norms each value for each pattern entry so that patterns with a lot of attributes have each attribute weighted less than those with fewer attributes 


In [27]:
def all_attr_dict(pattern_list):
    attrib_dict = attr_dict(pattern_list)
    y_dict = yarn_dict(pattern_list)
    ca_dict = categ_dict(pattern_list)
    finaldict = {key:[attrib_dict[key], y_dict[key], ca_dict[key]] 
                for key in y_dict.keys()}
    for key in finaldict:
        while len(finaldict[key])>1:
            finaldict[key][0].update(finaldict[key][1])
            finaldict[key].pop(1)
        finaldict[key] = finaldict[key][0]
    normed_fd = {key:{k:1/math.sqrt(len(d)) for k in d} for key,d in finaldict.items()}
    return normed_fd

### Example output:

In [74]:
all_attr_dict(example_pattern_list)

{'169260': {'pa_female': 0.2886751345948129,
  'pa_adult': 0.2886751345948129,
  'pa_textured': 0.2886751345948129,
  'pa_one-piece': 0.2886751345948129,
  'pa_seamless': 0.2886751345948129,
  'pa_written-pattern': 0.2886751345948129,
  'pa_in-the-round': 0.2886751345948129,
  'pa_slipped-stitches': 0.2886751345948129,
  'yarn_id_DK': 0.2886751345948129,
  'pc_accessories': 0.2886751345948129,
  'pc_neck-torso': 0.2886751345948129,
  'pc_cowl': 0.2886751345948129},
 '273024': {'pa_in-the-round': 0.35355339059327373,
  'pa_unisex': 0.35355339059327373,
  'pa_short-rows': 0.35355339059327373,
  'pa_written-pattern': 0.35355339059327373,
  'yarn_id_Bulky': 0.35355339059327373,
  'pc_accessories': 0.35355339059327373,
  'pc_neck-torso': 0.35355339059327373,
  'pc_cowl': 0.35355339059327373},
 '548384': {'pa_male': 0.24253562503633297,
  'pa_female': 0.24253562503633297,
  'pa_unisex': 0.24253562503633297,
  'pa_teen': 0.24253562503633297,
  'pa_adult': 0.24253562503633297,
  'pa_lace': 0.2

# count_df_funcs.py


## pattern_attr_to_df
Input is dictionary created in all_attr_dict. It creates a dataframe whose index is pattern code, columns are various attributes, categories, and yarn weights.


In [28]:
def pattern_attr_to_df(pattern_dict):
    df_attr = pd.DataFrame(list(pattern_dict.values()), 
            index=list(pattern_dict.keys())).fillna(0)
    return df_attr

### Example output:

In [80]:
example_attr_dict = all_attr_dict(example_pattern_list)
pattern_attr_to_df(example_attr_dict)

Unnamed: 0,pa_adult,pa_asymmetric,pa_bias,pa_bottom-up,pa_chart,pa_dropped-stitches,pa_female,pa_icord-edging,pa_in-the-round,pa_kitchener,...,pa_written-pattern,pc_accessories,pc_cowl,pc_neck-torso,yarn_id_Aran,yarn_id_Bulky,yarn_id_DK,yarn_id_Fingering,yarn_id_Sport,yarn_id_Worsted
169260,0.288675,0.0,0.0,0.0,0.0,0.0,0.288675,0.0,0.288675,0.0,...,0.288675,0.288675,0.288675,0.288675,0.0,0.0,0.288675,0.0,0.0,0.0
273024,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.353553,0.0,...,0.353553,0.353553,0.353553,0.353553,0.0,0.353553,0.0,0.0,0.0,0.0
548384,0.242536,0.0,0.0,0.242536,0.0,0.0,0.242536,0.0,0.242536,0.0,...,0.242536,0.242536,0.242536,0.242536,0.0,0.0,0.0,0.242536,0.0,0.0
713248,0.301511,0.0,0.0,0.301511,0.0,0.0,0.0,0.0,0.301511,0.0,...,0.301511,0.301511,0.301511,0.301511,0.0,0.0,0.0,0.301511,0.0,0.0
819716,0.0,0.235702,0.235702,0.0,0.0,0.0,0.0,0.235702,0.0,0.0,...,0.235702,0.235702,0.235702,0.235702,0.0,0.0,0.0,0.0,0.235702,0.0
942286,0.0,0.0,0.0,0.0,0.258199,0.258199,0.258199,0.0,0.0,0.258199,...,0.258199,0.258199,0.258199,0.258199,0.0,0.0,0.0,0.0,0.0,0.258199
1004479,0.0,0.0,0.0,0.0,0.258199,0.0,0.0,0.0,0.258199,0.258199,...,0.258199,0.258199,0.258199,0.258199,0.0,0.0,0.0,0.0,0.0,0.258199
1023884,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.377964,0.377964,0.377964,0.377964,0.377964,0.0,0.0,0.0,0.0,0.0
1024387,0.235702,0.0,0.235702,0.0,0.235702,0.0,0.0,0.0,0.0,0.0,...,0.235702,0.235702,0.235702,0.235702,0.0,0.0,0.0,0.0,0.235702,0.0
1024446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.377964,0.377964,0.377964,0.377964,0.0,0.377964,0.0,0.0,0.0,0.0


## pattern_list_to_df
Input is a pattern list, output is a dataframe of all the patterns and their attributes. Essentially a combo of all_attr_dict and pattern_attr_to_df

In [29]:
def pattern_list_to_df(pattern_list):
    pattern_dict = all_attr_dict(pattern_list)
    df_attr = pattern_attr_to_df(pattern_dict)
    return df_attr

### Example output:

In [81]:
pattern_list_to_df(example_pattern_list)

Unnamed: 0,pa_adult,pa_asymmetric,pa_bias,pa_bottom-up,pa_chart,pa_dropped-stitches,pa_female,pa_icord-edging,pa_in-the-round,pa_kitchener,...,pa_written-pattern,pc_accessories,pc_cowl,pc_neck-torso,yarn_id_Aran,yarn_id_Bulky,yarn_id_DK,yarn_id_Fingering,yarn_id_Sport,yarn_id_Worsted
169260,0.288675,0.0,0.0,0.0,0.0,0.0,0.288675,0.0,0.288675,0.0,...,0.288675,0.288675,0.288675,0.288675,0.0,0.0,0.288675,0.0,0.0,0.0
273024,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.353553,0.0,...,0.353553,0.353553,0.353553,0.353553,0.0,0.353553,0.0,0.0,0.0,0.0
548384,0.242536,0.0,0.0,0.242536,0.0,0.0,0.242536,0.0,0.242536,0.0,...,0.242536,0.242536,0.242536,0.242536,0.0,0.0,0.0,0.242536,0.0,0.0
713248,0.301511,0.0,0.0,0.301511,0.0,0.0,0.0,0.0,0.301511,0.0,...,0.301511,0.301511,0.301511,0.301511,0.0,0.0,0.0,0.301511,0.0,0.0
819716,0.0,0.235702,0.235702,0.0,0.0,0.0,0.0,0.235702,0.0,0.0,...,0.235702,0.235702,0.235702,0.235702,0.0,0.0,0.0,0.0,0.235702,0.0
942286,0.0,0.0,0.0,0.0,0.258199,0.258199,0.258199,0.0,0.0,0.258199,...,0.258199,0.258199,0.258199,0.258199,0.0,0.0,0.0,0.0,0.0,0.258199
1004479,0.0,0.0,0.0,0.0,0.258199,0.0,0.0,0.0,0.258199,0.258199,...,0.258199,0.258199,0.258199,0.258199,0.0,0.0,0.0,0.0,0.0,0.258199
1023884,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.377964,0.377964,0.377964,0.377964,0.377964,0.0,0.0,0.0,0.0,0.0
1024387,0.235702,0.0,0.235702,0.0,0.235702,0.0,0.0,0.0,0.0,0.0,...,0.235702,0.235702,0.235702,0.235702,0.0,0.0,0.0,0.0,0.235702,0.0
1024446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.377964,0.377964,0.377964,0.377964,0.0,0.377964,0.0,0.0,0.0,0.0


## user_profile_df

Input is a username, output is a df of all the user's favs and projects in one dataframe. 

Dataframe columns include pattern categories (prefaced with "pc_"), pattern attributes (prefaced with "pa_"), and yarn weight (prefaced with "yarn_id_"). 

Output also includes a final column that is user data. Each code either gets a 1 if it's in favs or a 3 or user's rating if it's in their projects


In [37]:
def user_profile_df(username):
    fav_list = get_favs_list(username)
    proj_list = get_project_list_from_username(username)
    full_list = list(set(fav_list+proj_list))
    df = pattern_list_to_df(full_list)
    user_data_dict = user_data(username)
    df['user_data'] = pd.Series(user_data_dict)
    return df 

### Example output:

In [38]:
user_profile_df("katec125").head(10)

Unnamed: 0,pa_3-4-sleeve,pa_3-dimensional,pa_Intarsia,pa_Shetland,pa_adult,pa_afterthought-pocket,pa_aline,pa_amigurumi,pa_appliqued,pa_asymmetric,...,yarn_id_Bulky,yarn_id_DK,yarn_id_Fingering,yarn_id_Lace,yarn_id_Light-Fingering,yarn_id_None,yarn_id_Sport,yarn_id_Super-Bulky,yarn_id_Worsted,user_data
36436,0.0,0.0,0.0,0.0,0.242536,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.242536,0.0,0.0,0.0,0.0,0.0,0.0,1
57264,0.0,0.0,0.0,0.0,0.229416,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.229416,0.0,0.0,1
78651,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.27735,0.0,0.0,0.0,0.0,0.0,0.0,1
91776,0.0,0.0,0.0,0.0,0.316228,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.316228,0.0,0.0,0.0,0.0,0.0,3
103767,0.0,0.0,0.0,0.0,0.208514,0.0,0.0,0.0,0.0,0.0,...,0.208514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
105865,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,1
118820,0.0,0.0,0.0,0.0,0.301511,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.301511,0.0,0.0,0.0,0.0,0.0,0.0,3
121493,0.0,0.0,0.0,0.0,0.235702,0.0,0.0,0.0,0.0,0.0,...,0.235702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
132791,0.0,0.0,0.0,0.0,0.27735,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.27735,0.0,0.0,0.0,0.0,0.0,0.0,1
171113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,1


## user_profile_dict
This function takes in the previously made dataframe, and then takes the dot product of each column and the user data column. Each product then gets added to a dictionary where the key is a given attribute and the value is the dot product.

In [39]:
def user_profile_dict(user_data_df):
     return {col:np.dot(user_data_df[col],user_data_df.user_data) for col in list(user_data_df.columns)[:-1]}

### Example output:

In [41]:
user_data_df = user_profile_df("katec125")
user_profile_dict(user_data_df)

{'pa_3-4-sleeve': 1.5369028913029648,
 'pa_3-dimensional': 0.7862127634730902,
 'pa_Intarsia': 1.8912033012166127,
 'pa_Shetland': 0.7282378854318489,
 'pa_adult': 30.239751759389442,
 'pa_afterthought-pocket': 0.23570226039551587,
 'pa_aline': 1.7351376052018876,
 'pa_amigurumi': 0.3779644730092272,
 'pa_appliqued': 0.6255432421712244,
 'pa_asymmetric': 1.5612385803482762,
 'pa_baby': 0.3333333333333333,
 'pa_ballet-neck': 0.862955612973055,
 'pa_beads': 1.3363062095621219,
 'pa_bias': 0.6976336241065542,
 'pa_boat-neck': 1.8410759723558563,
 'pa_bobble-or-popcorn': 0.2672612419124244,
 'pa_bottom-up': 14.068443875731088,
 'pa_bracelet-sleeve': 0.8729158626938447,
 'pa_brioche-tuck': 3.337717184833733,
 'pa_buttoned': 0.4661424227863119,
 'pa_buttonholes': 0.22360679774997896,
 'pa_cables': 3.006497992007887,
 'pa_cap-sleeve': 1.79438020684445,
 'pa_chart': 23.54558881330513,
 'pa_child': 2.473742042999813,
 'pa_circle-shaped': 0.5547001962252291,
 'pa_circular-yoke': 9.39919857721550

## user_profile

This function just puts together the previous two functions, the input is a username, and the output is a user profile in the form of a dictionary where the keys are pattern attributes and the values are the dot products between the user data column and each attribute column.


In [43]:
def user_profile(username):
    user_data_df = user_profile_df(username)
    return user_profile_dict(user_data_df)

### Example output:

In [44]:
user_profile("katec125")

{'pa_3-4-sleeve': 1.5369028913029648,
 'pa_3-dimensional': 0.7862127634730902,
 'pa_Intarsia': 1.8912033012166127,
 'pa_Shetland': 0.7282378854318489,
 'pa_adult': 30.239751759389442,
 'pa_afterthought-pocket': 0.23570226039551587,
 'pa_aline': 1.7351376052018876,
 'pa_amigurumi': 0.3779644730092272,
 'pa_appliqued': 0.6255432421712244,
 'pa_asymmetric': 1.5612385803482762,
 'pa_baby': 0.3333333333333333,
 'pa_ballet-neck': 0.862955612973055,
 'pa_beads': 1.3363062095621219,
 'pa_bias': 0.6976336241065542,
 'pa_boat-neck': 1.8410759723558563,
 'pa_bobble-or-popcorn': 0.2672612419124244,
 'pa_bottom-up': 14.068443875731088,
 'pa_bracelet-sleeve': 0.8729158626938447,
 'pa_brioche-tuck': 3.337717184833733,
 'pa_buttoned': 0.4661424227863119,
 'pa_buttonholes': 0.22360679774997896,
 'pa_cables': 3.006497992007887,
 'pa_cap-sleeve': 1.79438020684445,
 'pa_chart': 23.54558881330513,
 'pa_child': 2.473742042999813,
 'pa_circle-shaped': 0.5547001962252291,
 'pa_circular-yoke': 9.39919857721550

*Note: the two functions listed below may end up not being used*

## create_fav_df
Input is a username, output is a dataframe of all the patterns in their favorites

In [78]:
def create_fav_df(username):
    fav_list = get_favs_list(username)
    fav_df = pattern_list_to_df(fav_list)
    return fav_df

### Example output:

In [84]:
create_fav_df("katec125").head(10)

Unnamed: 0,pa_3-4-sleeve,pa_3-dimensional,pa_Intarsia,pa_Shetland,pa_adult,pa_afterthought-pocket,pa_aline,pa_amigurumi,pa_asymmetric,pa_baby,...,yarn_id_Aran,yarn_id_Bulky,yarn_id_DK,yarn_id_Fingering,yarn_id_Lace,yarn_id_Light-Fingering,yarn_id_None,yarn_id_Sport,yarn_id_Super-Bulky,yarn_id_Worsted
36436,0.0,0.0,0.0,0.0,0.242536,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.242536,0.0,0.0,0.0,0.0,0.0,0.0
57264,0.0,0.0,0.0,0.0,0.229416,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.229416,0.0,0.0
78651,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.27735,0.0,0.0,0.0,0.0,0.0,0.0
105865,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25
121493,0.0,0.0,0.0,0.0,0.235702,0.0,0.0,0.0,0.0,0.0,...,0.0,0.235702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
132791,0.0,0.0,0.0,0.0,0.27735,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.27735,0.0,0.0,0.0,0.0,0.0,0.0
171113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0
172113,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0
243036,0.0,0.0,0.0,0.0,0.27735,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.27735,0.0,0.0,0.0,0.0,0.0
291786,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## create_proj_df
Input is a username, output is a dataframe of the user's projects, essentially the same as the fav function but working with the user's projects instead of what's in their favorites

In [79]:
def create_proj_df(username):
    proj_list = get_project_list_from_username(username)
    proj_df = pattern_attr_to_df(proj_list)
    return proj_df