In [40]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.decomposition import LatentDirichletAllocation as LDA
from nltk.corpus import stopwords

In [55]:
def load_segmented(path):
    # Load segmented text
    with open(path, 'r') as f:
        text = f.read()
        # Split text by delimiter
        text = text.split('\n==========\n')
        # Strip whitespace
        text = [x.strip() for x in text]
    
    return text

In [65]:
def topic_modelling(segmented_text, n_topics=1):
    cv = CountVectorizer(stop_words=stopwords.words('english'), lowercase=True)
    counts = cv.fit_transform([segmented[3]])

    tfidf = TfidfTransformer()
    x_tf = tfidf.fit_transform(counts)

    components = n_topics
    lda = LDA(n_components=components)
    lda_array = lda.fit_transform(x_tf)

    components = [lda.components_[i] for i in range(len(lda.components_))]
    features = cv.get_feature_names()
    important_words = [sorted(features, key = lambda x: components[j][features.index(x)], reverse = True)[:5] for j in range(len(components))]
    print(important_words)

In [56]:
segmented = load_segmented('../text_segmentation/transcripts_tiling.txt')

In [68]:
print(segmented[3])

But it was almost a good thing. I don't know. I feel like it almost made us realize how good our friendship was because we'd spend that time with him. What what where do ya were meant to be actually in contact with each other and why don't we take so long to rekindle out again? And then yeah, I'm pretty much just said to me next time you're in Perth if you have a job like dude come stay with me and I was like,

 I will actually take you up on the offer and I did and it was like nothing changed and it was like exactly the same as what we picked up where we left off. So, yeah, and it's really anything. I feel like we had a before years apart or five or whatever you want to say is yeah, we definitely needed it might we were both we both spend some time figuring out some things trying to figure out who has corny as it sounds who we were letting myself. Sorry. And yeah, and then yeah meeting back together. I feel like the Friendship we had was a more meaningful and sincere friendship than w

In [70]:
topic_modelling(segmented, n_topics=2)

[[ 3  3  1  1  1  1  2  1  1  1  1  1  1  1  1  1  1  3  1  1  1  3  3  1
   1  2  1  1  1  1  1  1  1  1  1  1  2  1  1  4  1  1  1  2  1  1  3  3
   1  1  2  1  2  1  2  1  1  7  1  3  1  1  3  1  1  6  1  1  1  1  1  1
   1  1  1  2  1  5  1  1  1  1  1  1 20  2  1  4  1  1  1  1  1  2  2  1
   1  1  1  1  2  1  1  1  1  2  1  1  1  1  1  2  1  3  1  3  1  1  1  1
   2  1  1  1  1  2  1  1  1  2  2  1  2  1  6  1  3  4  1  1  3  4  1  1
   1  1  3  1  1  2  1  1  2  1  1  1  2  1  1  3  1  6  1  1  5  1  2  1
   1  2  3  2  1  1  3  1  1  2  1 12  1  1  1]]
[[0.07999529 0.92000471]]
[['like', 'yeah', 'good', 'head', 'right'], ['like', 'yeah', 'good', 'think', 'head']]


In [67]:
segmented = load_segmented('../text_segmentation/sentence_segmentation.txt')