@@ -10,9 +10,10 @@ def __init__(self):
self.database()

def triples(self):
""" Generates triples from the given data string. So if our string were
"What a lovely day", we'd generate (What, a, lovely) and then
(a, lovely, day).
"""
Generates triples from the given data string. So if our string were
"What a lovely day", we'd generate (What, a, lovely) and then
(a, lovely, day).
"""

if len(self.words) < 3:
@@ -31,6 +32,8 @@ def database(self):

def generate_markov_twitter_post(self):
self.word_size = len(self.words)
if self.word_size < 4:
return []
size = 10
seed = random.randint(0, self.word_size-3)
seed_word, next_word = self.words[seed], self.words[seed+1]
File renamed without changes.
@@ -0,0 +1,2 @@
from twitter import TwitterPerson
from facebook import FacebookPerson, FacebookPost
@@ -0,0 +1,125 @@
#at artandlogic.com modles for comment storing)

import hashlib
import json
from django.db import models, IntegrityError
from django.core.exceptions import ValidationError
from django.contrib.auth.models import User
import facebook
import requests

from constants import *

#Classes for storing fb data
class FacebookPerson(User, models.Model):
def __str__(self):
return username

class FacebookPost(models.Model):
author = models.ForeignKey(FacebookPerson, default=None, null=True)
index = models.IntegerField(default=0)
story = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)
message = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)
picture = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)
link = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)
content = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)

def __str__(self):
return_str = \
' author: ' + str(self.author) + '\n' + \
' index: ' + str(self.index) + '\n' + \
' story: ' +str(self.story) + '\n' + \
' message: ' + str(self.message) + '\n' + \
' picture: ' + str(self.picture) + '\n' + \
' link: ' + str(self.link) + '\n' + \
' content: ' + self.content
return return_str

#Classes for scraping and collecting data
def get_clean_fb_feed(self):
"""
For interpreting the raw facebook information and
storing corresponding objects in the db
"""

auth = self.social_auth.first()
graph = facebook.GraphAPI(auth.extra_data['access_token'])
raw_data = graph.get_object('/me/home')

data = None
for key in raw_data:
if key == 'data':
data = raw_data[key]
break

if (data == None):
return data

clean_data = [
{
'from' : x['from']['name'],
'name' : x.get('name'),
'story' : x.get('story'),
'message' : x.get('message'),
'picture' : x.get('picture'),
'link' : x.get('link'),
'friends' : x.get('friends'),
'comments' : process_comments(x.get('comments'))
} for x in data]

return clean_data
User.add_to_class('get_clean_fb_feed', get_clean_fb_feed)


def save_clean_fb_feed(self):
"""
Takes clean fb data and saves it to the db
"""

clean_data = self.get_clean_fb_feed()

post_owner = User.objects.get_or_create(username="SELF")

for raw_post in clean_data:
content = {}
content['story'] = raw_post['story']
content['message'] = raw_post['message']
content['picture'] = raw_post['picture']
content['link'] = raw_post['link']
content['friends'] = raw_post['story']
#I want to loop through comments as well

message=raw_post['message']
story=raw_post['story']
poster = User.objects.get_or_create(username=raw_post['from'])
post = FacebookPost.objects.get_or_create(poster=poster[0],
index=0,
story=content['story'],
message=content['message'],
picture=content['picture'],
link=content['link'],
content=content
)
User.add_to_class('save_clean_fb_feed', save_clean_fb_feed)

def process_comments(comments):
"""
Organizes the comment data into a dictionary
"""

if (comments == None):
return None

data = comments.get('data')
if (data == None):
return None

clean_data = [
[
{'from' : x['from']['name']},
{'message' : x.get('message')},
{'like count' : x.get('like_count')}
] for x in data]

return clean_data

@@ -0,0 +1,181 @@
#at artandlogic.com modles for comment storing)

import hashlib
import json
from django.db import models, IntegrityError
from django.core.exceptions import ValidationError
from django.contrib.auth.models import User
import requests
from TweepyScraper import TweepyScraper
from Markov import Markov

from constants import *

tweepy_consumer_key = 'ZKx8Yg55evn1U65vRWQ0Zj7Jr'
tweepy_consumer_secret = '26OYZDNj0hC17ei6JplHuerzoaxokQBpU9X2dsegkLLCShBK2y'
tweepy_access_token = '14404065-baBGgZmVoCnZEU1L0hCVq6ed6qHDFXVrLSQpAKXcw'
tweepy_access_token_secret = '3jbRjcgZV82OGLOsxv9Xg8G29h1oc9l9kqKTMXH4vEPNi'


#Classes for storing twitter data
class TwitterPerson(User, models.Model):
real_name = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)
avatar = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)
happiness = models.IntegerField(default=0)

def __str__(self):
return self.username

def scrape(self):
"""
Scrape the given user with tweepy
take all of their tweets and
turn them into TwitterPost objects
strip out uncommon words (links, hashtags, users)
and save them seperately in instances, then
replace with dummy words.
Create a unique hex key for each post
this is just for me being able to tell
what original twitter post a composite post
was generated from. Also helps
to see how many composite posts are
similar to their source material
"""
t = TweepyScraper(tweepy_consumer_key,
tweepy_consumer_secret,
tweepy_access_token,
tweepy_access_token_secret)

tweets = t.get_tweets_from_user(self.username, 100)
for tweet in tweets:
words = tweet.split()

if ('RT' in tweet):
continue
if (words):
continue

final_tweet = ""
for word in words:
if "@" in word:
new_mention = TwitterMention(author=self, content=word)
word = "@user"
if "http" in words[i][0]:
new_link = TwitterLink(author=self, content=word)
word = "link"
if "#" in words[i][0]:
new_tag = TwitterHashtag(author=self, content=word)
word = "#tag"
final_tweet = final_tweet + word + " "
final_tweet = final_tweet[:-1]

hex_key = hashlib.md5(tweet.encode('utf-8').strip()).hexdigest()
key = str(int(hex_key, 16) % len(colors))

TwitterPost.objects.get_or_create(author=self, \
content=final_tweet, \
hex_key=hex_key)
TwitterPost.save()

def apply_markov_chains(self):
"""
Just roll this into the actual calculation and have it make the django objects
"""

#Make this take in instance of tpmpart
#The markov stuff to keep here
#Now to the markov calc
#Generates single post
words = []
for twitter_post in self.twitterpost_set.all():
for word in str(twitter_post.content).split():
words.append(word)

m = Markov()
m.words = words
m.database()
markov_twitter_post = m.generate_markov_twitter_post()

parent = TwitterPostMarkov.objects.create(author=self)
for pair in markov_twitter_post:
content = pair[0]
id = pair[1]
part = TwitterPostMarkovPart.objects.get_or_create(
parent_post=parent,
content=content,
original_tweet_id=id)
part[0].save()

parent.save()
return

class TwitterPost(models.Model):
author = models.ForeignKey(TwitterPerson, default=None, null=True)
content = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)
hex_key = models.BigIntegerField()

def __str__(self):
return_str = \
' author: ' + str(self.author) + '\n' + \
' content: ' + self.content
return return_str

class TwitterPostMarkov(models.Model):
author = models.ForeignKey(TwitterPerson, default=None, null=True)

def __str__(self):
all_parts = TwitterPostMarkovPart.objects.filter(parent_post__id=self.id)
content = ''
for part in all_parts:
content += part.content

return_str = \
' author: ' + str(self.author) + '\n' + \
' content: ' + content
return return_str

class TwitterPostMarkovPart(models.Model):
parent_post = models.ForeignKey(TwitterPostMarkov)
content = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)
original_tweet_id = models.IntegerField(default=0)

def __str__(self):
return self.content

class TwitterLink(models.Model):
author = models.ForeignKey(TwitterPerson)
content = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)

def __str__(self):
return self.content

class TwitterHashtag(models.Model):
author = models.ForeignKey(TwitterPerson)
content = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)

def __str__(self):
return self.content

class TwitterMention(models.Model):
author = models.ForeignKey(TwitterPerson)
content = models.CharField(max_length=1000, default='PLACEHOLDER', null=True)

def __str__(self):
return self.content

def scrape_top_twitter_people(self):
"""
"""
t = TweepyScraper(tweepy_consumer_key,
tweepy_consumer_secret,
tweepy_access_token,
tweepy_access_token_secret)
names_and_unames = t.scrape_top_users(50)
for entry in names_and_unames:
existing_person = TwitterPerson.objects.get_or_create(username=entry['uname'])[0]
existing_person.real_name = entry['name']
existing_person.avatar = entry['avatar']
existing_person.save()

return names_and_unames
User.add_to_class('scrape_top_twitter_people', scrape_top_twitter_people)
@@ -3,8 +3,8 @@
from django.http import HttpResponse
from django.template import RequestContext, loader
from django.contrib.auth.models import User
from scrapers.models import TwitterPerson, FacebookPerson
from .models import FacebookPost, TwitterPost, TwitterPostMarkov, TwitterPostMarkovPart
from scrapers.models.twitter import TwitterPerson, TwitterPost, TwitterPostMarkov, TwitterPostMarkovPart
from scrapers.models.facebook import FacebookPerson, FacebookPost

from constants import *

@@ -16,7 +16,7 @@ def home(request):
return render_to_response('scrapers/post_index.html', context_instance=context)

if(request.GET.get('collect_twitter_data')):
#request.user.scrape_top_twitter_people()
request.user.scrape_top_twitter_people()
twitter_people = TwitterPerson.objects.all()
context = RequestContext(request, {'request': request, 'user': request.user, 'twitter_people': twitter_people})
return render_to_response('scrapers/twitter_people.html', context_instance=context)
@@ -48,15 +48,19 @@ def twitter_people(request):

def twitter_person_detail(request, twitter_person_username):
template = loader.get_template('scrapers/twitter_person_detail.html')
author = TwitterPerson.objects.filter(username=twitter_person_username)[0]
author = TwitterPerson.objects.get_or_create(username=twitter_person_username)[0]

author.scrape()
author.apply_markov_chains()

#request.user.scrape_twitter_person(twitter_person_username)
twitter_posts = TwitterPost.objects.filter(author=author)
twitter_posts = [t.content for t in twitter_posts]
twitter_posts_markov = TwitterPostMarkov.objects.filter(author=author)

request.user.apply_markov_chains(author, twitter_posts)
twitter_posts = [t.content for t in twitter_posts]

#TODO - this is not returning anything...
twitter_posts_markov_objects = TwitterPostMarkov.objects.filter(author=author)

twitter_posts_markov = []
for post in twitter_posts_markov_objects:
all_parts = TwitterPostMarkovPart.objects.filter(parent_post__id=post.id)
@@ -66,6 +70,10 @@ def twitter_person_detail(request, twitter_person_username):

twitter_posts_markov.append(complete_post)

print twitter_posts
print twitter_posts_markov
print colors

context = RequestContext(request, {
'uname': twitter_person_username,
'twitter_posts' : twitter_posts,