# Extract, Clean, and Load Data Scripts

In [1]:
# Import required libraries
import os, fnmatch
import json
from bson import ObjectId
import pprint
import time

# Libraries for Mongo
import pymongo

# Libraries for Neo4j
from neo4j.v1 import GraphDatabase
from neo4j.v1 import exceptions

# Libraries for ElasticSearch
from elasticsearch import Elasticsearch

# Geohashing
# http://www.willmcginnis.com/2016/01/16/pygeohash-1-0-1-fast-gis-geohash-python/
import pygeohash as pgh

In [2]:
# Import database passwords
import secrets

In [3]:
data_folder = '/Users/linkalis/GIS8990_DistributedSpatialDatabases/testdatasets/data_split_medium_5000/'
logs_folder = '/Users/linkalis/GIS8990_DistributedSpatialDatabases/testdatasets/data_split_medium_5000/logs/'

## Extractor

In [4]:
class Extractor:
    ''' Takes a folder name and a logs directory path and initializes a log file containing the name of 
    every file in the target folder.  Contains methods for checking which files in the log have not yet 
    been loaded and getting and reading in the next available file. '''
    
    def __init__(self, data_path, logs_path):
        self.data_path = data_path
        self.logs_path = logs_path
        
        # Create a directory to store the log files, if necessary
        logs_dir = os.path.dirname(self.logs_path)
        if not os.path.exists(logs_dir):
            os.makedirs(logs_dir)
        
        # Create a 'files_to_load.txt' file, then write the name of every file in the directory to this file
        files_to_load_log  = open(self.logs_path + "/files_to_load.txt", "w")
        data_files_list = os.listdir(self.data_path) 
        file_type = "*.txt"  
        for file in data_files_list:  
            if fnmatch.fnmatch(file, file_type):
                files_to_load_log.write(file)
                files_to_load_log.write("\n")
        files_to_load_log.close()     

    def next_file_available(self):
        ''' Checks if there's another file available in the files_to_load.txt log. If there's a 
        file available, returns True. If no files are remaining, returns False so we can stop 
        reading in data. '''
        files_to_load_log  = open(self.logs_path + "/files_to_load.txt", "r")
        next_file_name = files_to_load_log.readline().rstrip("\n")
        if next_file_name == '':
            return(False)
        else:
            return(True)
        
    def get_next_file(self):
        ''' Reads from the files_to_load.txt file and gets the name of the next file in the list.
        Calls read_file() to read in the target file in as list of dictionaries. Returns a tuple 
        that includes the list of dictionaries representing the JSON data, along with the filename
        so we can keep track of this file in subsequent tasks. '''
        files_to_load_log  = open(self.logs_path + "/files_to_load.txt", "r")
        next_file_name = files_to_load_log.readline().rstrip("\n") # strip the newline character from the end of filename
        print("Extractor: Next file is: " + next_file_name)
        next_file_path = self.data_path + next_file_name 
        return(self.read_data_file(next_file_path), next_file_name)
        
    def read_data_file(self, file_to_read):
        ''' Reads the JSON-formatted file line by line and returns each line as a dictionary. '''
        print("Extractor: Reading file: " + file_to_read)
        reading_file = open(file_to_read, "r") # open as read-only
        list_of_jsondicts = []
        for line in reading_file.readlines():
            list_of_jsondicts.append(json.loads(line))
        print("Extractor: Read " + str(len(list_of_jsondicts)) + " data rows.")
        return(list_of_jsondicts)

## Cleaner

In [5]:
class Cleaner: 
    ''' Takes a batch of data that's been extracted as a list of dictionaries.  Contains methods to 
    iterate over each record in the list, running it through a series of cleaning steps. Logs the
    ids of the records that are affected by the various cleaning steps. Returns the cleaned data back 
    as a list. '''
    
    def __init__(self, data_list, file_name, logs_path):
        self.data_list = data_list
        self.logs_path = logs_path
        self.file_name = file_name
        
    def clean_data(self):
        ''' Iterates over each data element, progressing through each cleaning step on each element. 
        Logs the ids of data elements that contain nulls and/or errors to arrays as we go. At the end
        of cleaning, invokes the log_cleaning() method to '''
        
        step1_log = []
        step2_log = []
        
        #i = 0
        for record in self.data_list:
            #print(i)
            self.fix_null_places(record, step1_log)
            self.fix_bounding_box(record, step2_log)
            self.get_centroid(record)
            #i += 1
        
        print("Cleaner: Finished cleaning records.")
        self.log_cleaning(step1_log, step2_log)
        return(self.data_list)
        
    def fix_null_places(self, record, log_array):
        ''' Since place values are critical to our data model, substitute dummy 
        values if we have a place value that equals 'None'. This will keep it from 
        blowing up the database when we try to insert. '''
        
        def set_none_place():
            record['place'] = dict()
            record['place']['id'] = "9999999"
            record['place']['name'] = "No Place"
            record['place']['full_name'] = "No Place Available"
            record['place']['country'] = "No Country Available"
            record['place']['country_code'] = "ZZ"
            record['place']['place_type'] = "NA"
            record['place']['url'] = "NA"
            record['place']['bounding_box'] = dict() # initialize dictionary to hold bounding box
            record['place']['bounding_box']['type'] = "Polygon"
            record['place']['bounding_box']['coordinates'] = list() # initialize coordinate list w/in bounding box
            record['place']['bounding_box']['coordinates'].append([]) # append the [0] element to hold four pairs of coordinates
            record['place']['bounding_box']['coordinates'][0].append([0.0, 0.0]) # append 'dummy' coordinates
            record['place']['bounding_box']['coordinates'][0].append([0.0, 0.0])
            record['place']['bounding_box']['coordinates'][0].append([0.0, 0.0])
            record['place']['bounding_box']['coordinates'][0].append([0.0, 0.0])
            log_array.append(record["id_str"])
        
        # Check if the record has a 'place' attribute 
        # Note: on very rare occasions, this attribute simply doesn't exist in the tweet
        if 'place' in record:
            # If record has a 'place' attribute, but it's set to 'None', then go and add dummy values
            if record['place'] is None:
                set_none_place()
            pass # if a place is present and its value is not None, then move along without changing/setting its values
        else: 
            # If record doesn't have a 'place' attribute, then initialize one before adding dummy values
            record['place'] = None
            set_none_place()    
       
    def fix_bounding_box(self, record, log_array):
        ''' Fix a few issues that are going on with bounding boxes:
        1. Twitter Place bounding boxes only have four points. Need to close them off so they're a 
        complete polygon. Take the first coordinate of the bounding box array and repeat it at the 
        end of the bounding box array.
        2. If the bounding box is actually a point (i.e. all of the four points are the same), then 
        "fake out" a bounding box by transforming into a small rectangle with a small buffer around 
        the point.  We can recognize these by looking for place.place_type == 'poi'. '''
        
        #print(record['id_str'])
        original_bounding_box = record['place']['bounding_box']['coordinates'][0].copy()
        #print(original_bounding_box)
        #print(record['place']['place_type'])
        
        if (record['place']['place_type'] == 'poi' or record['place']['place_type'] == 'NA'):
            point_bounding_box = [[None for x in range(2)] for y in range(5)]
            point_bounding_box[0][0] = original_bounding_box[0][0] - 0.0001
            point_bounding_box[0][1] = original_bounding_box[0][1] - 0.0001
            point_bounding_box[1][0] = original_bounding_box[1][0] - 0.0001
            point_bounding_box[1][1] = original_bounding_box[1][1] + 0.0001
            point_bounding_box[2][0] = original_bounding_box[2][0] + 0.0001
            point_bounding_box[2][1] = original_bounding_box[2][1] + 0.0001
            point_bounding_box[3][0] = original_bounding_box[3][0] + 0.0001
            point_bounding_box[3][1] = original_bounding_box[3][1] - 0.0001
            point_bounding_box[4][0] = original_bounding_box[0][0] - 0.0001
            point_bounding_box[4][1] = original_bounding_box[0][1] - 0.0001
            record['place']['better_bounding_box'] = dict()
            record['place']['better_bounding_box']['type'] = "Polygon"
            record['place']['better_bounding_box']['coordinates'] = list()
            record['place']['better_bounding_box']['coordinates'].append([])
            record['place']['better_bounding_box']['coordinates'][0] = point_bounding_box
            #print(record['place']['better_bounding_box']['coordinates'])
            log_array.append(record["id_str"])
        else:
            first_coords = original_bounding_box[0]
            #print(first_coords)
            original_bounding_box.append(first_coords)
            #print(original_bounding_box)
            record['place']['better_bounding_box'] = dict()
            record['place']['better_bounding_box']['type'] = "Polygon"
            record['place']['better_bounding_box']['coordinates'] = list()
            record['place']['better_bounding_box']['coordinates'].append([])
            record['place']['better_bounding_box']['coordinates'][0] = original_bounding_box
            #print(record['place']['better_bounding_box']['coordinates'])
                  
    def get_centroid(self, record):
        bounding_box = record['place']['better_bounding_box']['coordinates'][0];
        lower_left = bounding_box[0];
        upper_right = bounding_box[2];
        centroid_long = lower_left[0] + ((upper_right[0] - lower_left[0]) / 2);
        centroid_lat = lower_left[1] + ((upper_right[1] - lower_left[1]) / 2);
        record['place']['centroid'] = dict()
        record['place']['centroid']['type'] = "Point"
        record['place']['centroid']['coordinates'] = [centroid_long, centroid_lat]
        
        record['place']['centroid_geohash'] = pgh.encode(centroid_lat, centroid_long, precision=12)
        
        
    def log_cleaning(self, step1_log, step2_log):
        ''' When cleaning is done, put the cleaning log arrays into a dictionary and write the result 
        to the cleaning log file. '''
        
        log_dict = dict()
        log_dict['file_name'] = self.file_name
        log_dict['null_places_fixed'] = step1_log
        log_dict['bounding_boxes_fixed'] = step2_log
        
        cleaning_log  = open(self.logs_path + "/cleaning_log.txt", "a+") # open file in append mode
        cleaning_log.write(json.dumps(log_dict))
        cleaning_log.write("\n")
        cleaning_log.close()

## Loader

In [6]:
class Loader:
    ''' 
    Contains general methods for initializing a database connection, loading data by interating over
    records and writing them one by one to the database, and logging data about the number of successful
    and failed loads to a log file. When setting up the database connection, this class invokes other
    database-specific loader classes that contain all required methods to "plug and play" with this 
    generic loader class (ex: initialize_connection(), load_record(), etc.). '''
    
    def __init__(self, data_list, file_name, logs_path):
        self.data_list = data_list
        self.logs_path = logs_path
        self.file_name = file_name
        self.db_connection = None
    
    def get_connection(self, db_type, db_host, db_port, username=None, pwd=None, db_name=None, collection_name=None):
        if db_type == "mongodb":
            self.db_connection = MongoDBLoader(db_host, db_port, username, pwd, db_name, collection_name)
            self.db_connection.initialize_connection()
        if db_type == "neo4j":
            self.db_connection = Neo4jLoader(db_host, db_port, username, pwd)
            self.db_connection.initialize_connection()
        if db_type == "elasticsearch":
            self.db_connection = ElasticSearchLoader(db_host, db_port, db_name)
            self.db_connection.initialize_connection()   
    
    def load_data(self):
        # Initialize variables we want to count so we can output them to the log file at the end of load
        begin = time.time()
        i = 0
        success_count = 0
        fail_count = 0
        fail_log = []
        
        print("Loader: Loading records...")
        for record in self.data_list:
            #print(i)
            #print("Loading record with id: " + record['id_str'] + "; User id: " + str(record['user']['id']) + "; Place id: " + str(record['place']['id']))
            try:
                self.db_connection.load_record(record)
                success_count += 1
                #print("Loaded!")
            except Exception as e:
                print("Couldn't load record with id: " + record['id_str'])
                #print(e)
                fail_count += 1
                fail_dict = dict()
                fail_dict['id'] = record['id_str']
                fail_dict['error'] = str(e)
                fail_log.append(fail_dict)      
            #i += 1
        
        print("Loader: Finished loading records.")
        end = time.time()
        load_time = end - begin # compute time elapsed for load
        self.db_connection.close_connection() # close database connection
        self.log_load(load_time, success_count, fail_count, fail_log) # write load results to log
    
    def load_batch_data(self):
        begin = time.time()
        
        self.db_connection.load_batch(self.data_list)
        
        end = time.time()
        load_time = end - begin # compute time elapsed for load
        self.db_connection.close_connection() # close database connection
        self.log_load(load_time, success_count, fail_count, fail_log) # write load results to log
        
    
    def log_load(self, load_time, success_count, fail_count, fail_log):
        ''' When load is done, record the time it took to run, number of successes, and number of failures.
        Write this info, along with file_name, as a JSON string to a log file. Then remove the name of the 
        successfully loaded file from files_to_load.txt so we don't try to re-load it on the next iteration. ''' 
        
        log_dict = dict()
        log_dict['file_name'] = self.file_name
        log_dict['load_time'] = load_time
        log_dict['success_count'] = success_count
        log_dict['fail_count'] = fail_count
        log_dict['fail_log'] = fail_log
        loaded_files_log  = open(self.logs_path + "/loaded_files.txt", "a+") # open file in append mode
        loaded_files_log.write(json.dumps(log_dict))
        loaded_files_log.write("\n")
        loaded_files_log.close()
        
        # https://www.reddit.com/r/learnpython/comments/3xuych/least_resource_intensive_way_to_delete_first_line/
        files_to_load_log  = open(self.logs_path + "/files_to_load.txt", "r+") # open in read/write mode
        files_to_load_log.readline() # read the first line and throw it out
        remaining_files = files_to_load_log.read() # read the rest
        files_to_load_log.seek(0) # set the cursor to the top of the file
        files_to_load_log.write(remaining_files) # write the data back
        files_to_load_log.truncate() # set the file size to the current size

### MongoDBLoader

http://api.mongodb.com/python/current/examples/bulk.html

In [20]:
class MongoDBLoader:
    
    def __init__(self, db_host, db_port, username, pwd, db_name, collection_name):
        self.connection = None
        self.client = None
        self.username = username
        self.pwd = pwd
        self.db_host = db_host
        self.db_port = db_port
        self.db_name = db_name
        self.collection_name = collection_name
    
    def initialize_connection(self):
        self.client = pymongo.MongoClient('mongodb://' + self.username + ':' + self.pwd + '@' + self.db_host + ':' + self.db_port)
        target_db = self.client[self.db_name]
        target_collection = target_db[self.collection_name]
        
        # Initialize index on tweet 'id' field so we throw an error when trying to load duplicates of the same tweet
        target_collection.create_index([("id", pymongo.ASCENDING)], name='id_index', unique=True) 
        self.connection = target_collection
    
    def load_record(self, record):
        # Change data types as necessary
        record['id_str'] = str(record['id_str'])
        record['timestamp_ms'] = int(record['timestamp_ms'])
        #record_id = str(record['id_str'])
        #self.connection.update_one({id: record_id}, {"$set": {"id": record_id}}, upsert=True)
        self.connection.insert_one(record)
    
    def close_connection(self):
        self.client.close()

### Neo4jLoader

https://neo4j.com/developer/python/

https://www.lynda.com/Neo4j-tutorials/Use-Neo4j-driver-Python/601789/659331-4.html

In [7]:
class Neo4jLoader:
    
    def __init__(self, db_host, db_port, username, pwd):
        self.connection = None
        self.username = username
        self.pwd = pwd
        self.db_host = db_host
        self.db_port = db_port
        
        self.neo4j_query_string = """
            MERGE (t:Tweet {tweet_id: toInteger($tweet_id)})
            ON CREATE SET t.text = $text,
                t.lang = $lang,
                t.timestamp_ms = toInteger($timestamp_ms),
                t.favorited = $favorited,
                t.retweeted = $retweeted,
                t.retweet_count = toInteger($retweet_count),
                t.favorite_count = toInteger($favorite_count),
                t.quote_count = toInteger($quote_count),
                t.reply_count = toInteger($reply_count),
                t.coordinates = point({ 
                    longitude: toFloat($tweet_coordinates_long), 
                    latitude: toFloat($tweet_coordinates_lat) 
                })

            MERGE (u:User {user_id: toInteger($user_id)})
            SET	u.name = $user_name,
                u.screen_name = $user_screen_name,
                u.description = $user_description,
                u.location = $user_location,
                u.lang = $user_lang,
                u.time_zone = $user_time_zone,
                u.verified = $user_verified,
                u.utc_offset = $user_utc_offset,
                u.created_at = $user_created_at,
                u.listed_count = $user_listed_count,
                u.friends_count = $user_friends_count,
                u.followers_count = $user_followers_count,
                u.favourites_count = $user_favourites_count,
                u.is_translator = $user_is_translator,
                u.statuses_count = $user_statuses_count


            MERGE (t)-[:TWEETED_BY]->(u)
            MERGE (u)-[:TWEETED]->(t)

            MERGE (p:Place {place_id: toString($place_id), latitude: toFloat($place_centroid_lat), longitude: toFloat($place_centroid_long)})
            SET	p.name = $place_name,
                p.full_name = $place_full_name,
                p.country = $place_country,
                p.country_code = $place_country_code,
                p.place_type = $place_type,
                p.bounding_box_LL = point({ 
                    longitude: toFloat($place_bounding_box_LL_long), 
                    latitude: toFloat($place_bounding_box_LL_lat) 
                }),
                p.bounding_box_UR = point({ 
                    longitude: toFloat($place_bounding_box_UR_long), 
                    latitude: toFloat($place_bounding_box_UR_lat) 
                }),
                p.centroid = point({ 
                    longitude: toFloat($place_centroid_long), 
                    latitude: toFloat($place_centroid_lat) 
                })
            
            MERGE (t)-[:LOCATED_AT]->(p)

            WITH t, $entities_user_mentions AS mentions
            UNWIND mentions AS mention
                MERGE (mentioned_user:User {user_id: toInteger(mention.id), name: mention.name, screen_name: mention.screen_name})
                MERGE (t)-[:MENTIONS]->(mentioned_user)

            WITH t, $entities_hashtags AS hashtags
            UNWIND hashtags AS hashtag
                MERGE (h:Hashtag {hashtag_id: hashtag.text})
                MERGE (t)-[:HASHTAGS]->(h)
            """
    
    def initialize_connection(self):
        # Initialize Neo4j driver and start a session
        uri = 'bolt://' + self.db_host + ':' + self.db_port
        driver = GraphDatabase.driver(uri, auth=(self.username, self.pwd))
        self.connection = driver.session()
        
        # Start a session and add uniqueness constraints and indexes
        session = self.connection.session()
        self.connection.run("CREATE INDEX ON :Tweet(tweet_id)")
        self.connection.run("CREATE INDEX ON :User(user_id)")
        self.connection.run("CREATE INDEX ON :Place(place_id)")
        self.connection.run("CREATE INDEX ON :Hashtag(hashtag_id)")
        self.connection.run("CREATE INDEX ON :Place(centroid)") # spatial index
        #session.close()
    
    def load_record(self, record):
        try:
#             with self.connection.session() as session:
#                 tx = session.begin_transaction()
#                 #print(type(tx))
#                 db_result = self.structure_data_for_load(tx, record)
#                 tx.commit()
            tx = self.connection.begin_transaction()
            self.structure_data_for_load(tx, record)
            tx.commit()
        except Exception as e:
            print(e)               
                    
    #@staticmethod
    def structure_data_for_load(self, tx, data_element):
        tx.run(self.neo4j_query_string, parameters={
            'tweet_id': data_element['id_str'],
            'text': data_element['text'],
            'lang': data_element['lang'],
            'timestamp_ms': data_element['timestamp_ms'],
            'favorited': data_element['favorited'],
            'retweeted': data_element['retweeted'],
            'retweet_count': data_element['retweet_count'],
            'favorite_count': data_element['favorite_count'],
            'quote_count': data_element['quote_count'],
            'reply_count': data_element['reply_count'],
            'tweet_coordinates_long': data_element['coordinates']['coordinates'][0] if data_element['coordinates'] != None else None,
            'tweet_coordinates_lat': data_element['coordinates']['coordinates'][1] if data_element['coordinates'] != None else None,
            'user_id': data_element['user']['id'],
            'user_name': data_element['user']['name'],
            'user_screen_name': data_element['user']['screen_name'],
            'user_description': data_element['user']['description'],
            'user_location': data_element['user']['location'],
            'user_lang': data_element['user']['lang'],
            'user_time_zone': data_element['user']['time_zone'],
            'user_verified': data_element['user']['verified'],
            'user_utc_offset': data_element['user']['utc_offset'],
            'user_created_at': data_element['user']['created_at'],
            'user_listed_count': data_element['user']['listed_count'],
            'user_friends_count': data_element['user']['friends_count'],
            'user_followers_count': data_element['user']['followers_count'],
            'user_favourites_count': data_element['user']['favourites_count'],
            'user_is_translator': data_element['user']['is_translator'],
            'user_statuses_count': data_element['user']['statuses_count'],
            'place_id': data_element['place']['id'],
            'place_name': data_element['place']['name'],
            'place_full_name': data_element['place']['full_name'],
            'place_country': data_element['place']['country'],
            'place_country_code': data_element['place']['country_code'],
            'place_type': data_element['place']['place_type'],
            'place_bounding_box_LL_long': data_element['place']['better_bounding_box']['coordinates'][0][0][0],
            'place_bounding_box_LL_lat': data_element['place']['better_bounding_box']['coordinates'][0][0][1],
            'place_bounding_box_UR_long': data_element['place']['better_bounding_box']['coordinates'][0][2][0],
            'place_bounding_box_UR_lat': data_element['place']['better_bounding_box']['coordinates'][0][2][1],
            'place_centroid_long': data_element['place']['centroid']['coordinates'][0],
            'place_centroid_lat': data_element['place']['centroid']['coordinates'][1],
            'entities_user_mentions': data_element['entities']['user_mentions'],
            'entities_hashtags': data_element['entities']['hashtags']
        })
        
    def close_connection(self):
        self.connection.close()

### ElasticSearchLoader

https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html

In [7]:
class ElasticSearchLoader:
    
    def __init__(self, db_host, db_port, db_name):
        self.connection = None
        self.client = None
        self.db_host = db_host
        self.db_port = db_port
        self.db_name = db_name
    
    def initialize_connection(self):
        self.client = Elasticsearch([{'host': self.db_host, 'port': self.db_port}])
        
        # Check if we can connect successfully
        if self.client.ping(): 
            print("Connected to ElasticSearch instance.") 
            
            # If we're successfully connected, then create a new index (aka database) in ElasticSearch to hold the data
            if not self.client.indices.exists(self.db_name):
                # Define settings for the new index
                settings = {
                    "settings": {
                        "number_of_shards": 1,
                        "number_of_replicas": 0
                    },
                    "mappings": {
                        "tweet": {
                            "properties": {
                                "text": {
                                    "type": "text"
                                },
                                "timestamp_ms": {
                                    "type": "date"
                                },
                                "place": {
                                    "properties": {
#                                         "centroid": {
#                                             "type": "geo_point"
#                                         },
                                        "better_bounding_box": {
                                            "type": "geo_shape"
                                        },
                                        "centroid_geohash": {
                                             "type": "geo_point"
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                
                try:
                    self.client.indices.create(index=self.db_name, body=settings)
                    print("Created new ElasticSearch index named: " + self.db_name)
                except Exception as e:
                    print("Couldn't create new index because: ")
                    print(e)
        else:
            print("Couldn't connect to ElasticSearch instance.")
    
    def load_record(self, record):
        try:
            self.client.index(index=self.db_name, doc_type='tweet', body=record)
        except Exception as e:
            print(e)
    
    def close_connection(self):
        pass

## Putting it all together...

In [8]:
# Initialize extractor
extractor = Extractor(data_folder, logs_folder)

In [None]:
while extractor.next_file_available():
    next_file_data, next_file_name = extractor.get_next_file() # read in the next file
    cleaner = Cleaner(next_file_data, next_file_name, logs_folder)
    cleaned_data = cleaner.clean_data()
    loader = Loader(cleaned_data, next_file_name, logs_folder)
    #loader.get_connection("mongodb", secrets.mongodb_host, secrets.mongodb_port, secrets.mongodb_username, secrets.mongodb_pwd, db_name="twitter_small", collection_name="tweets")
    #loader.get_connection("neo4j", secrets.neo4j_host, secrets.neo4j_port, secrets.neo4j_username, secrets.neo4j_pwd)
    #loader.get_connection("neo4j", "localhost", "7687", "neo4j", "n0sql4m3")
    loader.get_connection("elasticsearch", secrets.elasticsearch_host, secrets.elasticsearch_port, db_name="twitter_medium")
    loader.load_data()

Extractor: Next file is: 5GB_unicode_splitgd.txt
Extractor: Reading file: /Users/linkalis/GIS8990_DistributedSpatialDatabases/testdatasets/data_split_medium_5000/5GB_unicode_splitgd.txt
Extractor: Read 5000 data rows.
Cleaner: Finished cleaning records.
Connected to ElasticSearch instance.
Created new ElasticSearch index named: twitter_medium
Loader: Loading records...
Loader: Finished loading records.
Extractor: Next file is: 5GB_unicode_splitfw.txt
Extractor: Reading file: /Users/linkalis/GIS8990_DistributedSpatialDatabases/testdatasets/data_split_medium_5000/5GB_unicode_splitfw.txt
Extractor: Read 5000 data rows.
Cleaner: Finished cleaning records.
Connected to ElasticSearch instance.
Loader: Loading records...
Loader: Finished loading records.
Extractor: Next file is: 5GB_unicode_splitel.txt
Extractor: Reading file: /Users/linkalis/GIS8990_DistributedSpatialDatabases/testdatasets/data_split_medium_5000/5GB_unicode_splitel.txt
Extractor: Read 5000 data rows.
Cleaner: Finished cleani

POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:911.522s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/linkalis/anaconda/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/Users/linkalis/anaconda/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/Users/linkalis/anaconda/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/linkalis/anaconda/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
socket.timeout: timed out

Dur

ConnectionTimeout caused by - ReadTimeoutError(HTTPConnectionPool(host='192.168.0.15', port='9200'): Read timed out. (read timeout=10))


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.001s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
OSError: [Errno 51] Network is unreachable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/c

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94048>: Failed to establish a new connection: [Errno 51] Network is unreachable) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94048>: Failed to establish a new connection: [Errno 51] Network is unreachable)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.012s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
OSError: [Errno 51] Network is unreachable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/c

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94d30>: Failed to establish a new connection: [Errno 51] Network is unreachable) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94d30>: Failed to establish a new connection: [Errno 51] Network is unreachable)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.002s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
OSError: [Errno 51] Network is unreachable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/c

ConnectionError(<urllib3.connection.HTTPConnection object at 0x1105e19e8>: Failed to establish a new connection: [Errno 51] Network is unreachable) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x1105e19e8>: Failed to establish a new connection: [Errno 51] Network is unreachable)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.004s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
OSError: [Errno 51] Network is unreachable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/c

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94a90>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94a90>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.226s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccfb70>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccfb70>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.013s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError((<urllib3.connection.HTTPConnection object at 0x115f94ba8>, 'Connection to 192.168.0.15 timed out. (connect timeout=10)')) caused by: ConnectTimeoutError((<urllib3.connection.HTTPConnection object at 0x115f94ba8>, 'Connection to 192.168.0.15 timed out. (connect timeout=10)'))


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:3.036s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x1121547f0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x1121547f0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.008s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf128>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf128>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.059s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf860>: Failed to establish a new connection: [Errno 49] Can't assign requested address) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf860>: Failed to establish a new connection: [Errno 49] Can't assign requested address)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.008s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94358>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94358>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.010s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94198>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94198>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.118s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccfe10>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccfe10>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.017s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf278>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf278>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.011s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf358>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf358>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.552s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94b70>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94b70>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.010s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x1105e11d0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x1105e11d0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.008s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf3c8>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf3c8>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.007s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccfef0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccfef0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.009s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94a90>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94a90>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.007s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94860>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94860>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.021s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x112154be0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x112154be0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.111s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf828>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf828>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.006s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94b70>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94b70>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.012s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f945f8>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f945f8>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.049s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f946a0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f946a0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.004s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x1105e16a0>: Failed to establish a new connection: [Errno 50] Network is down) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x1105e16a0>: Failed to establish a new connection: [Errno 50] Network is down)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.004s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf358>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf358>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.009s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94438>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94438>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.086s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf748>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf748>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.243s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94470>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94470>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.010s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf4e0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf4e0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.006s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccfe10>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccfe10>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.010s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94da0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94da0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.010s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f949b0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f949b0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.006s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x1105e1320>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x1105e1320>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.005s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf7b8>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf7b8>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.043s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf780>: Failed to establish a new connection: [Errno 50] Network is down) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf780>: Failed to establish a new connection: [Errno 50] Network is down)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.006s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f940b8>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f940b8>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.503s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94ef0>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x115f94ef0>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.140s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
OSError: [Errno 50] Network is down

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connecti

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf748>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf748>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.146s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
OSError: [Errno 50] Network is down

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connecti

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf3c8>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf3c8>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:10.573s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
socket.timeout: timed out

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connectionpool.py

ConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf7b8>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x116ccf7b8>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.119s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

ConnectionError(<urllib3.connection.HTTPConnection object at 0x11697ba90>: Failed to establish a new connection: [Errno 61] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x11697ba90>: Failed to establish a new connection: [Errno 61] Connection refused)


POST http://192.168.0.15:9200/twitter_medium/tweet [status:N/A request:0.006s]
Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/linkalis/anaconda/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 171, in perform_request
    response = self.pool.urlopen(method, url, body, retries=False, headers=request_headers, **kw)
  File "/Users/linkalis/anaconda/lib/python3.6/site-package

## JUNK

### Extractor

In [14]:
extractor = Extractor(data_folder, logs_folder)

In [33]:
next_file_data, next_file_name = extractor.get_next_file()

Extractor: Next file is: 5GB_unicode_splitgd.txt
Extractor: Reading file: /Users/linkalis/GIS8990_DistributedSpatialDatabases/testdatasets/data_split_medium_5000/5GB_unicode_splitgd.txt
Extractor: Read 5000 data rows.


In [37]:
print(next_file_data[100]['place'])
pprint.pprint(next_file_data[100])
print('True' if 'place' in next_file_data[100] else 'False')

{'id': '00d58bbe24ee6718', 'url': 'https://api.twitter.com/1.1/geo/id/00d58bbe24ee6718.json', 'name': 'Gaziantep', 'country': 'Türkiye', 'full_name': 'Gaziantep, Türkiye', 'attributes': {}, 'place_type': 'city', 'bounding_box': {'type': 'Polygon', 'coordinates': [[[37.300637, 36.997742], [37.300637, 37.126346], [37.473381, 37.126346], [37.473381, 36.997742]]]}, 'country_code': 'TR'}
{'contributors': None,
 'coordinates': None,
 'created_at': 'Mon Jan 01 11:27:10 +0000 2018',
 'entities': {'hashtags': [], 'symbols': [], 'urls': [], 'user_mentions': []},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 947791303517106176,
 'id_str': '947791303517106176',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'is_quote_status': False,
 'lang': 'tr',
 'place': {'attributes': {},
           'bounding_box': {'coordinates': [[[37.300637, 36.997

### Cleaner

In [16]:
cleaner = Cleaner(next_file_data, next_file_name, logs_folder)
cleaned_data = cleaner.clean_data()

Cleaner: Finished cleaning records.


In [19]:
#next_file_data[122]['id_str'] # has coordinates and a place
pprint.pprint(cleaned_data[576]['place'])
pprint.pprint(cleaned_data[4234]['place'])
pprint.pprint(cleaned_data[575]['place'])

{'better_bounding_box': {'coordinates': [[[-0.0001, -0.0001],
                                          [-0.0001, 0.0001],
                                          [0.0001, 0.0001],
                                          [0.0001, -0.0001],
                                          [-0.0001, -0.0001]]],
                         'type': 'Polygon'},
 'bounding_box': {'coordinates': [[[0.0, 0.0],
                                   [0.0, 0.0],
                                   [0.0, 0.0],
                                   [0.0, 0.0]]],
                  'type': 'Polygon'},
 'centroid': {'coordinates': [0.0, 0.0], 'type': 'Point'},
 'country': 'No Country Available',
 'country_code': 'ZZ',
 'full_name': 'No Place Available',
 'id': '9999999',
 'name': 'No Place',
 'place_type': 'NA',
 'url': 'NA'}
{'better_bounding_box': {'coordinates': [[[-0.0001, -0.0001],
                                          [-0.0001, 0.0001],
                                          [0.0001, 0.0001],
        

In [None]:
pprint.pprint(cleaned_data[1243])

### Loader

In [None]:
loader = Loader(cleaned_data, next_file_name, logs_folder)
loader.load_data()

In [None]:
# Testing Neo4j import

# Initialize Neo4j driver
uri = "bolt://linkylink.net:7687"
user = input("Username: ")
pwd = input("Password: ")
driver = GraphDatabase.driver(uri, auth=(user, pwd))

with driver.session() as session:
    result = session.run("MATCH (n:Tweet) RETURN n LIMIT 25")
    
    for record in result:
        print(record)

### Twurl API

In [None]:
# https://developer.twitter.com/en/docs/geo/places-near-location/api-reference/get-geo-reverse_geocode.html

twurl '/1.1/geo/reverse_geocode.json?lat=2.204446&long=102.189931&granularity=country'
twurl '/1.1/geo/reverse_geocode.json?lat=2.255562&long=102.250785&granularity=country'

In [27]:
es = Elasticsearch([{'host': secrets.elasticsearch_host, 'port': secrets.elasticsearch_port}])
print(es.ping())
es.indices.delete(index='twitter_small')

DELETE http://192.168.0.15:9200/twitter_small [status:404 request:0.006s]


True


NotFoundError: TransportError(404, 'index_not_found_exception', 'no such index')