In [None]:
import pandas as pd
import os
import time
import tweepy
import json
import csv
import sqlite3
import pickle
import datetime as dt
import warnings
#from prompt_toolkit import print_formatted_text, HTML

In [None]:
#https://docs.python.org/3/library/sqlite3.html
class Sqlite3Db:
    """Sqlite3 Database General Methods"""
    def __init__(self, name=None):
        self.conn = None
        self.cursor = None

        if name:
            self.open(name)
    
    def open(self, name):
        try:
            self.conn = sqlite3.connect(name,detect_types=sqlite3.PARSE_DECLTYPES)
            self.cursor = self.conn.cursor()
        except sqlite3.Error as e:
            print("Error connecting to database " + name + " with message:" + e.args[0] )
    
    def close(self):
        if self.conn:
            self.conn.commit()
            self.cursor.close()
            self.conn.close()

    def query(self,sqlStm,sqlStmPrm=None):
        if sqlStmPrm is not None:
            #print("First")
            self.cursor.execute(sqlStm,sqlStmPrm)
        else:
            #print("Second")
            self.cursor.execute(sqlStm)

    #Using these magic methods (__enter__, __exit__) allows you to implement objects which can be used easily with the with statement.
    def __enter__(self):
        return self.conn
    
    def __exit__(self,exc_type,exc_value,traceback):
        self.close()

In [1]:
class CustomComputation:
    """Custom Computation General Methods"""
    def __init__(self, version):
        self.version = version

    def __str__(self):
        return f"Custom Computation version {self.version}"
        
    def compute_coherence_values(self, dictionary, corpus, texts
                                 , limit, start=2, step=3, chunksize=2000
                                 , passes=1, alpha='symmetric', Decay = 0.5
                                 , iterations=50):
        """
        Compute c_v coherence for various number of topics

        Parameters:
        ----------
        dictionary : Gensim dictionary
        corpus : Gensim corpus
        texts : List of input texts
        limit : Max num of topics

        Returns:
        -------
        model_list : List of LDA topic models
        coherence_values : Coherence values corresponding to the
        limit    LDA model with respective number of topics
        """
        coherence_values_topic = []
        model_list_topic = []
        num_topic_lst = []
        model_perplexity_lst = []
        
        for num_topics in range(start, limit, step):
            model = LdaMulticore(corpus=corpus, num_topics=num_topics, id2word=dictionary 
                                 #,chunksize=chunksize, 
                                 ,passes=passes
                                 ,alpha=alpha
                                )
            model_list_topic.append(model)
            coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v')
            coherence_values_topic.append(coherencemodel.get_coherence())
            num_topic_lst.append(num_topics)
            model_perplexity_lst.append(model.log_perplexity(corpus))

        return model_list_topic, coherence_values_topic, num_topic_lst, model_perplexity_lst
    
    def select_sorted_value(self, df, col_name, top_num_topics):
        """
        Sort and select top number of topics

        Parameters:
        ----------
        df : Gensim dictionary
        col_name : Gensim corpus
        top_num_topics : top number of topics. The order is defined by the sign of this parameter

        Returns:
        -------
        df_sorted : Sorted dataframe
        """
        #Sort by coherence
        df_sorted = df.iloc[(df[col_name]-df.coherence.median()).abs().argsort()[:top_num_topics]]
        return df_sorted