# SocialVec Examples

## Import and initializations

The below cell is only needed to import a local version that was not install using pip

In [None]:
import os
import sys
from pathlib import Path
package_dir = os.path.join(Path(os.getcwd()).parent.absolute(),'socialvec')
sys.path.append(os.path.dirname(package_dir))

if you install the package using pip you can simply import it as below

In [None]:
%pip install --proxy=http://proxy-chain.intel.com:911 fastparquet

In [None]:
from socialvec.socialvec import SocialVec
sv = SocialVec()
#from socialvec.socialvec import SocialVecClassifier

## Basic Usage Examples

### Get a vector of a user using twitterid (string or integer), or by username

In [None]:
sv[12]

In [None]:
sv["12"]

In [None]:
sv["jack"]

### Get similar users

In [None]:
sv.get_similar('jack')

### Get the average embeddings of multiple users
When we want to get the embeddings of a user that is not a popular entity, we collect the list of accounts that this user follows, and provide it to the get_average_embeddings function. This function will return the embedding vector for this user.

** This function currently only supports getting a list of user IDs **

In [None]:
sv.get_userid('madonna')

In [None]:
v = sv.get_average_embeddings([sv.get_userid('rihanna'),
                               sv.get_userid('arianagrande'),
                               sv.get_userid('madonna')])


sv.get_similar(v[0])

## Get similar to multiple users
The function get similar can also get a list of twitter IDs, and will return the most similar list for the average of these users

In [None]:
edu = ['Harvard','MIT','UCLA']
edu_ids = [ sv.get_userid(id) for id in edu]

sports = ['FCBarcelona','ManUtd','realmadrid']
sports_ids = [ sv.get_userid(id) for id in sports]

In [None]:
sv.get_similar(edu_ids).head(3)

In [None]:
sv.get_similar(sports_ids).head(3)

## Get similarity

In [None]:
sv.get_similarity('barackobama', 'realdonaldtrump')

### get similarity for a vector

In [None]:
sv.get_similarity(sv[12], 'realdonaldtrump')

## Arithmetics fun

In [None]:
positive=['woman', 'king'], negative=['man']

In [None]:
ida = sv.get_userid('BarackObama')
idb = sv.get_userid('BillClinton')
idc = sv.get_userid('hillaryclinton')

In [None]:
sv.get_screen_name(sv.sv.wv.most_similar(positive=[sv.get_userid('BarackObama'), sv.get_userid('michelleobama')],
                                         negative=[sv.get_userid('JoeBiden')],
                                         topn=1)[0][0])

In [None]:
sv.get_similar(sv['michelleobama'] - sv['POTUS44'] + sv['HillaryClinton'])

# Classification Examples

# Get the embeddings of any user which is not popular

In [None]:
import toml
import tweepy

In [None]:
tweepy_config = toml.load("tweepy.toml")
tweepy_credentials = tweepy_config['credentials']

In [None]:
auth = tweepy.OAuthHandler(tweepy_credentials['consumer_key'], tweepy_credentials['consumer_secret'])
auth.set_access_token(tweepy_credentials['access_token'], tweepy_credentials['access_token_secret'])
api = tweepy.API(auth, proxy="http://proxy-chain.intel.com:911") # optionally add proxy, e.g.: proxy="http://proxy-chain.intel.com:911"

In [None]:
friends = api.get_friend_ids(screen_name="nirlotan")

In [None]:
nirlotan_embeddings = sv.get_average_embeddings(friends)[0]

In [None]:
sv.get_similar(nirlotan_embeddings)

# Init Clasffier

Currently classification only works with model version 2020c

In [None]:
from socialvec.socialvec import SocialVec
sv = SocialVec(model_name="2020c")


In [None]:
sv.init_classifier()

In [None]:
sv.classifier.predict("political",sv['barackobama'])

In [None]:
sv.classifier.predict("political",sv['NASA'])

# Test Classifiers

In [None]:
from socialvec.socialvec import SocialVec
sv = SocialVec(model_name="2020c")
sv.init_classifier()

In [None]:
def check_classifier(single_user, expected_class, classifier_name):
    user = sv.get_average_embeddings([sv.get_userid(single_user)])[0]
    prediction = sv.classifier.predict(classifier_name, user)
    if prediction[0] == expected_class:
        print(f"TEST PASSED. Expected: {expected_class}, Got: {prediction[0]}, confidence: {prediction[1]}")
    else:
        print(f"FAILED! Expected: {expected_class}, Got: {prediction[0]}, confidence: {prediction[1]}")

In [None]:
check_classifier("BarackObama", "Democrat", "political")
check_classifier("ChickfilA", "Republican", "political")
check_classifier("RapSheet", "Male", "gender")
check_classifier("ChelseaHouska", "Female", "gender")
check_classifier("RASHEEDA", "AfrAmerican", "ethnicity")
check_classifier("Country_Words", "Caucasian", "ethnicity")
check_classifier("NASA", "Degree", "education")
check_classifier("illuminatihotts", "HighSchool", "education")
check_classifier("ToysRUs", "Yes", "children")
check_classifier("donaldglover", "No", "children")