In [1]:
# importing the requests library
import requests
from langdetect import detect
from googletrans import Translator
import json
import os
import io
import emoji

In [2]:
# defining the api-endpoint 
API_URL = 'http://text-processing.com/api/sentiment/'

In [3]:
def get_sentiment_score(raw_tweet):
    print ("get_sentiment")
    full_text = raw_tweet['full_text']
    print(full_text)
    allchars = [str for str in full_text]
    emoji_list = [c for c in allchars if c in emoji.UNICODE_EMOJI]
    text = ' '.join([str for str in full_text.split() if not any(i in str for i in emoji_list)])
    print(text)
    
    language = detect(text)
    if(language != 'en'):
        translator = Translator()
        response = translator.translate(text)

    data = { "text": text }

    response = requests.post(url = API_URL, data = data)
    result = response.json()
#     print(result)
    return result

In [4]:
def get_raw_tweets_from_file(file_name):
    print ("get_raw_tweets_from_file")
    with open(file_name, mode='r', encoding='utf-8') as tweets_json:
        current_tweets = json.load(tweets_json)
        return current_tweets

In [5]:
def process_tweet(raw_tweet):
    print ("process_tweet")
    raw_tweet['sentiment'] = get_sentiment_score(raw_tweet)
    return raw_tweet

In [6]:
def create_file(new_file, tweet_array):
    print ("create_file")
    # create file 
    if not os.path.exists(new_file):
        open(new_file, 'w+').close()
        with open(new_file, mode='w', encoding='utf-8') as f:
            json.dump([], f)
    
    # append to file
    with open(new_file, mode='r', encoding='utf-8') as tweets_json:
        current_tweets = json.load(tweets_json)
        
    with open(new_file, mode='w', encoding='utf-8') as tweets_json:
        current_tweets = current_tweets + tweet_array
        json.dump(current_tweets, tweets_json)

In [7]:
def process_file(input_file_path, output_file_path):
    print('process_file')
    tweets = get_raw_tweets_from_file(input_file_path)
    processed_tweets = []
    for tweet in tweets:
        processed_tweets.append(process_tweet(tweet))
    create_file(output_file_path, processed_tweets)

In [8]:
def process_all_files(root_path):
    print('process_all_file')
    for root, dirs, files in os.walk(root_path):
        for file in files:
            input_file_path = root + '/' + file
            output_filename = file.replace('.json','_new.json')
            output_file_path = root + '/' + output_filename
            try:
                process_file(input_file_path, output_file_path)
                print('Successfully processed: ' + output_file_path)
            except:
                print('Error caused by:')
                print('File Name: ' + file)
                print('File Path: ' + output_file_path)
            print(" ")

In [9]:
ROOT_PATH = '/Users/sonalsingh/MSCS/1stSEM/IR/Projects/Project4/dev/Tweets'
process_all_files(ROOT_PATH)

process_all_file
process_file
get_raw_tweets_from_file
Error caused by:
File Name: .DS_Store
File Path: /Users/sonalsingh/MSCS/1stSEM/IR/Projects/Project4/dev/Tweets/.DS_Store
 
process_file
get_raw_tweets_from_file
process_tweet
get_sentiment
‡§™‡•ç‡§∞‡§¶‡•Ç‡§∑‡§£ ‡§∏‡•á ‡§ï‡§∞‡§æ‡§π‡§§‡•Ä ‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§ï‡•á ‡§≤‡§ø‡§è ‡§ï‡•ç‡§Ø‡§æ ‡§ï‡§ø‡§Ø‡§æ ‡§ï‡•á‡§ú‡§∞‡•Ä‡§µ‡§æ‡§≤ ‡§∏‡§∞‡§ï‡§æ‡§∞ ‡§®‡•á‚Ä¶? https://t.co/BLuZQdIa55
‡§™‡•ç‡§∞‡§¶‡•Ç‡§∑‡§£ ‡§∏‡•á ‡§ï‡§∞‡§æ‡§π‡§§‡•Ä ‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§ï‡•á ‡§≤‡§ø‡§è ‡§ï‡•ç‡§Ø‡§æ ‡§ï‡§ø‡§Ø‡§æ ‡§ï‡•á‡§ú‡§∞‡•Ä‡§µ‡§æ‡§≤ ‡§∏‡§∞‡§ï‡§æ‡§∞ ‡§®‡•á‚Ä¶? https://t.co/BLuZQdIa55
process_tweet
get_sentiment
"‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§ï‡§æ ‡§¶‡§¨‡§Ç‡§ó"  - my weekly video blog at @MyNation

‡§®‡§æ ‡§®‡§à ‡§¨‡§∏‡•á‡§Ç ‡§Ü‡§à
‡§®‡§æ ‡§è‡§Ø‡§∞ ‡§™‡•ç‡§Ø‡•Ç‡§∞‡§ø‡§´‡§æ‡§Ø‡§∞
‡§®‡§æ ‡§Æ‡•á‡§ï‡•á‡§®‡§æ‡§á‡§ú‡•ç‡§° ‡§ï‡•ç‡§≤‡•Ä‡§®‡§ø‡§Ç‡§ó ‡§π‡•Å‡§à
‡§®‡§æ ‡§™‡•â‡§≤‡•ç‡§Ø‡•Å‡§∂‡§® ‡§´‡•ç‡§∞‡•Ä ‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä https://t.co/Yf4qmPQvLz
"‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§ï‡§æ ‡§¶‡§¨‡