In [1]:
import copy
import os
import numpy as np
import matplotlib.pyplot as plt
import scipy

import user_model
import recommendation_model
import article_model

from sklearn.neighbors import KNeighborsClassifier

In [2]:
# read articles
article_directory = "goethe_articles"
article_names = [name for name in os.listdir(article_directory) if \
        name.endswith(".txt")]

article_dict = article_model.create_feature_vector_dict(
    article_directory, article_names, article_model.create_feature_vector)

In [3]:
# expect recommendation of only difficult articles
user_ratings_1 = {
    "A1_01.txt" : 1,
    "A1_02.txt" : 1,
    "B1_01.txt" : 1,
    "B1_02.txt" : 1,    
    "C2_01.txt" : 1,
    "C2_02.txt" : 1,
}

# expect recommendation of only basic articles
user_ratings_2 = {
    "A1_01.txt" : 5,
    "A1_02.txt" : 5,
    "B1_01.txt" : 5,
    "B1_02.txt" : 5,    
    "C1_01.txt" : 5,
    "C2_02.txt" : 5,
}

# expect recommendation in between
user_ratings_3 = {
    "A1_01.txt": 1,
    "A1_02.txt": 1,
    "A1_03.txt": 1,
    "C2_01.txt": 5,
    "C2_02.txt": 5,
    "C2_03.txt": 5,    
}

user_ratings_mixed = {
    "A1_01.txt": 1,
    "A1_02.txt": 1,
    "A1_03.txt": 1,
    "B1_01.txt": 2,
    "B1_02.txt": 2,
    "B1_03.txt": 2,
    "C2_01.txt": 3,
    "C2_02.txt": 3,
    "C2_03.txt": 3,   
}


In [4]:
# initialize user
user = user_model.UserRep(article_dict)
user.update_user(user_ratings_mixed)
k = 2
classified = recommendation_model.knn_classification_based_on_read_articles(article_dict, user.get_read_articles(), k)

In [5]:
fig = plt.figure()
ax = fig.add_subplot(111)

read_article_dict = user.get_read_articles()
read_article_names = read_article_dict.keys()
read_article_fvs = np.array([article_dict[n] for n in read_article_names])
read_article_xs = np.array([fv[1] for fv in read_article_fvs])
read_article_ys = np.array([fv[2] for fv in read_article_fvs])

In [6]:
article_names = article_dict.keys()
articles = np.array([article_dict[n] for n in article_names])
xs = np.array([a[1] for a in articles])
ys = np.array([a[2] for a in articles])

labels = np.array([name[0:2] for name in article_names])
color = {
    "unread": "black",
    1: 'red',
    2: 'blue',
    3: 'green'
}

colors = np.array([color[read_article_dict.get(n, "unread")] for n in article_names])

ax.scatter(xs, ys, color=colors)
for label, x, y in zip(labels, xs, ys):
    ax.annotate(label, xy=(x,y))

ax.set_xlabel("average sentence length")
ax.set_ylabel("average word length")
    
plt.show()

In [7]:
fig = plt.figure()
ax = fig.add_subplot(111)
colors = np.array([color[classified[name]] for name in article_names])
ax.scatter(xs, ys, color=colors)

for label, x, y in zip(labels, xs, ys):
    ax.annotate(label, xy=(x,y))

ax.set_xlabel("average sentence length")
ax.set_ylabel("average word length")
    
plt.show()