### Import packages

In [78]:
import pandas as pd
from datetime import datetime
from collections import defaultdict,  Counter
import string
from IPython.display import Markdown, display

### Read stopwords

In [79]:
stopwords = set(line.strip() for line in open('stopwords.txt', encoding="utf8"))
stopwords = stopwords.union(set(['mr','mrs','one','two','said']))
b_stopwords = set(line.strip() for line in open('bengali_stopwords.txt', encoding="utf8"))

### Parse chat and create Map

In [80]:
chatmap = {}
pingermap = defaultdict(int)
time_diff = 1 #days
prev_date = None
firsttext = defaultdict(list)
f = open('_chat.txt', encoding="utf8")
for line in f:
    clist = []
    nline = line.strip().replace('\u200e','').replace('\u202a','').split()
    datestring = ' '.join(nline[0:3])
    try:
        date = datetime.strptime(datestring, '[%d/%m/%y, %I:%M:%S %p]')
    except:
        print ('Invalid date in ', nline)
    clist.append(' '.join(nline[0:3]))
    content = ' '.join(nline[3:]).split(':')
    key = content[0].strip()
    text = ' '.join(content[1:])
    
    clist.append(text)
    
    if prev_date is None:
        prev_date = date
    else:
        diff = date - prev_date
        if diff.total_seconds() >= 3600*24*time_diff:
            pingermap[key] += 1
            firsttext[key].append(text)
        prev_date = date    
            
    if key not in chatmap:
        chatmap[key] = []
    chatmap[key].append(clist)

### Create name- wise word-frequency map

In [94]:
countmap = {}
table = str.maketrans('','', string.punctuation)
n = 100
gc = Counter()
for name in chatmap:
    c = Counter()
    #sentences = [[w.lower().translate(table).strip() for w in x[1].split() if not (w.lower() in stopwords or w.lower() in b_stopwords)] for x in chatmap[name]]
    sentences = []
    for x in chatmap[name]:
        sent = []
        for w in x[1].split():
            w = w.lower().translate(table).strip()
            if w not in stopwords:
                if w not in b_stopwords:
                    sent.append(w)
        sentences.append(sent)
        
                
    for i in sentences:
        for j in i:
            c[j] += 1
            gc[j] += 1
    countmap[name] = c.most_common(n)

### Display most frequent words per person

In [95]:
for name in countmap:
    s = ""
    for j in countmap[name]:
        s += "{}({})\t".format(j[0], str(j[1]))
    display(Markdown("## **{}**".format(name)))
    print("{}".format(s))  
    print()
    

## **Arpan Dutta**

chobbi(9)	raat(7)	binku(6)	halka(6)	mojo(6)	tomar(5)	chapless(5)	gaan(5)	pwc(5)	theek(5)	bangladesh(5)	baaltai(4)	chol(4)	pune(4)	gechis(4)	behala(4)	didi(4)	noye(4)	meet(4)	samne(4)	baaje(4)	yr(4)	chalate(4)	boshe(4)	pp(4)	baniyeche(4)	add(3)	levelr(3)	biye(3)	pinik(3)	korchillo(3)	byatha(3)	gelo(3)	mod(3)	pagla(3)	start(3)	miss(3)	mumbai(3)	tomae(3)	ü•É(3)	darjeeling(3)	üëçüèºüëçüèºüëçüèº(3)	porashona(3)	agey(3)	chillam(3)	bolbe(3)	khushi(3)	naachte(3)	üò≥(3)	sir(3)	dhur(3)	lyora(3)	berate(3)	salary(3)	project(3)	cazz(3)	kano(3)	saranik(3)	dicche(3)	üëåüèºüëåüèºüëåüèº(3)	tv(3)	series(3)	dp(2)	sotti(2)	mondarmoni(2)	grp(2)	plane(2)	25(2)	us(2)	parbo(2)	nope(2)	khai(2)	jaak(2)	goh(2)	bokachoda(2)	dara(2)	khelam(2)	dorkar(2)	jokhon(2)	arnd(2)	mins(2)	dada(2)	ma(2)	üë®‚Äçüë©‚Äçüëß‚Äçüë¶(2)	gelam(2)	pant(2)	cholche(2)	flat(2)	bcoz(2)	groups(2)	sobh(2)	bitcoin(2)	already(2)	jibon(2)	porshu(2)	cig(2)	ghonta(2)	25k(2)	shorir(2)	neye(2)	



## **Prasid**

company(16)	bepok(14)	facebook(13)	visa(13)	football(12)	actually(12)	fat(11)	youre(11)	code(11)	london(11)	band(10)	lagche(10)	lagbe(10)	san(10)	gelo(9)	gan(9)	phone(9)	weight(9)	nice(9)	gym(9)	goal(9)	class(9)	diego(9)	nam(8)	opor(8)	comment(8)	post(8)	tuio(8)	audio(8)	science(8)	research(8)	kothao(8)	exactly(8)	majhe(8)	english(8)	test(8)	boshe(8)	british(8)	leora(7)	called(7)	baler(7)	berie(7)	notun(7)	times(7)	mod(7)	pwc(7)	read(7)	rank(7)	easily(7)	companies(7)	friends(7)	mal(7)	duto(7)	bas(7)	guess(7)	final(7)	country(7)	muscle(7)	karur(6)	ish(6)	gechilam(6)	music(6)	hahahahaha(6)	youtube(6)	ure(6)	hifi(6)	baba(6)	easy(6)	graph(6)	literally(6)	coming(6)	money(6)	cup(6)	got(6)	bhab(6)	bepar(6)	khele(6)	since(6)	talking(6)	network(6)	basically(6)	add(6)	using(6)	ashol(6)	students(6)	score(6)	south(6)	software(6)	development(6)	gay(6)	prem(6)	place(6)	point(6)	already(6)	bijoy(6)	chat(6)	recently(6)	pari(5)	kalo(5)	puchu(5)	



## **Nirmal Roy**

europe(12)	real(11)	sex(10)	spanish(10)	mod(9)	city(9)	naam(9)	dutch(9)	match(9)	tu(9)	final(9)	british(9)	london(9)	follow(8)	nice(8)	everything(8)	network(8)	cup(8)	moment(8)	bichi(8)	kata(7)	kolkata(7)	bijoy(7)	pwc(7)	biye(7)	ill(7)	really(7)	random(7)	learning(7)	phd(7)	love(7)	friends(7)	us(7)	netherlands(7)	never(7)	difference(7)	dutta(6)	4(6)	gelo(6)	korbi(6)	december(6)	football(6)	post(6)	language(6)	beer(6)	deep(6)	course(6)	dhur(6)	england(6)	maya(6)	believe(6)	coach(6)	chat(6)	start(5)	sheyi(5)	dibyendu(5)	korchish(5)	duto(5)	choto(5)	lakh(5)	talk(5)	amsterdam(5)	american(5)	bhule(5)	iche(5)	facebook(5)	khela(5)	sports(5)	goal(5)	talking(5)	paper(5)	project(5)	stuff(5)	got(5)	eres(5)	doesnt(5)	phone(5)	italy(5)	ticket(5)	weather(5)	visa(5)	rich(5)	30(5)	nekane(5)	outside(5)	argentina(5)	jodi(4)	month(4)	sudeshna(4)	nh7(4)	mangsho(4)	almost(4)	audio(4)	atleast(4)	boss(4)	jash(4)	car(4)	ghure(4)	ma(4)	chudir(4)	



## **Sourav Sanyal**

bnara(16)	chakri(14)	oshadharon(13)	bhaloi(13)	darun(13)	biye(11)	egulo(10)	classic(10)	uchit(10)	jata(10)	bolchis(10)	google(10)	hobena(10)	utah(10)	ay(10)	blacks(10)	sotti(9)	jara(9)	jigges(9)	party(9)	sorry(9)	december(9)	bhabchi(8)	suru(8)	aste(8)	kal(8)	galo(8)	band(8)	lagche(8)	dekhechi(8)	europe(8)	chilam(8)	phd(8)	research(8)	ee(8)	kalo(7)	etai(7)	jachche(7)	gechilo(7)	sobcheye(7)	chara(7)	baba(7)	jokhon(7)	gym(7)	ichche(7)	paper(7)	korto(7)	pati(7)	ticket(7)	college(7)	wow(7)	tool(7)	size(7)	gandu(7)	problem(7)	onekdin(6)	dhuke(6)	exactly(6)	us(6)	porte(6)	shrobona(6)	korbi(6)	atleast(6)	bachcha(6)	d(6)	jibone(6)	school(6)	sunlam(6)	funny(6)	chorom(6)	ma(6)	matter(6)	music(6)	chaile(6)	pari(6)	easy(6)	south(6)	hoyni(6)	california(6)	tara(6)	nahole(6)	mangsho(6)	trip(6)	muscle(6)	you‚Äôre(6)	miles(6)	fat(6)	i‚Äôm(6)	kaka(6)	educated(6)	dp(5)	berie(5)	nana(5)	ero(5)	married(5)	saha(5)	learning(5)	bijoy(5)	jaygay(5)	choto(5)	



## **Deeptish**

december(7)	science(7)	churanto(7)	never(6)	around(6)	kolkata(6)	long(6)	might(5)	got(5)	day(5)	matter(5)	end(5)	ive(5)	hard(5)	course(5)	probably(5)	race(5)	stupid(5)	happy(4)	change(4)	phone(4)	laage(4)	kamon(4)	feel(4)	point(4)	important(4)	trying(4)	things(4)	goal(4)	man(4)	match(4)	hoytoh(4)	weekend(4)	paglachoda(4)	heavy(4)	üë¥üèª(4)	power(4)	hear(4)	look(4)	lower(4)	ak(4)	odbhut(4)	doesnt(4)	days(4)	education(4)	white(4)	blacks(4)	beautiful(3)	byaparta(3)	game(3)	obdhi(3)	least(3)	chakri(3)	times(3)	average(3)	talking(3)	month(3)	football(3)	kina(3)	sexy(3)	really(3)	going(3)	experience(3)	mba(3)	goals(3)	money(3)	party(3)	dada(3)	solid(3)	idea(3)	ex(3)	enough(3)	culture(3)	business(3)	worked(3)	hours(3)	exactly(3)	tell(3)	problem(3)	thakle(3)	set(3)	test(3)	3te(3)	analytics(3)	tech(3)	second(3)	influence(3)	tableau(3)	kano(3)	swagata(3)	deye(3)	bootstrap(3)	easy(3)	depends(3)	onyo(3)	dekhish(3)	thought(3)	noy(3)	wasnt(3)	facts(3)	



## **Raj**

dutch(8)	‡¶®‡¶§‡ßÅ‡¶®(6)	pawa(6)	euros(6)	ekshathe(5)	prem(5)	jaye(5)	august(5)	riju(5)	achi(4)	flight(4)	business(4)	yo(4)	ghure(4)	bhabchi(4)	subir(4)	uk(4)	language(4)	spanish(4)	beriye(4)	weather(4)	marattok(4)	emirates(4)	kal(4)	bag(4)	di(3)	4(3)	tomar(3)	hoeche(3)	jacchi(3)	ie(3)	masters(3)	kolkata(3)	shotti(3)	bollo(3)	7th(3)	kina(3)	nathu(3)	support(3)	918017141461(3)	shokale(3)	food(3)	‡¶ó‡¶æ‡¶Å‡¶°‡¶º(3)	‡¶Ü‡¶™‡¶®‡¶æ‡¶∞(3)	visa(3)	permit(3)	student(3)	boddo(3)	europe(3)	bien(3)	european(3)	nam(3)	philips(3)	kolkataye(3)	thakbo(3)	ekgada(3)	desh(3)	december(3)	city(3)	tickets(3)	tomato(3)	uchit(3)	jak(3)	jolche(3)	tax(3)	baire(3)	dec(3)	üòú(3)	spices(3)	aschi(2)	bhabe(2)	dieche(2)	score(2)	gymnastics(2)	dekhacche(2)	bhetore(2)	arpita(2)	barite(2)	weekender(2)	sem(2)	ayanangshu(2)	kortam(2)	startup(2)	2018(2)	birthday(2)	got(2)	probably(2)	school(2)	international(2)	management(2)	porshu(2)	whisky(2)	summer(2)	üòéüòéüòéüòé(2)	plan(2)	dara(2)	korechilo(2)	real(2)	fan(2)	barce

### Show Ping Stats

In [93]:
print('{:>15}\t\t{}'.format('Name','No. of pings (time diff = {} days)'.format(time_diff)))
for i in pingermap:
    print('{:>15}\t\t{}'.format(i, pingermap[i]))

           Name		No. of pings (time diff = 1 days)
  Sourav Sanyal		20
    Arpan Dutta		9
     Nirmal Roy		16
         Prasid		30
            Raj		3
       Deeptish		4


### Show activity

In [88]:
print('{:>15}\t\t{}'.format('Name','Number of messages'))
for i in chatmap:
    print('{:>15}\t\t{}'.format(i, len(chatmap[i])))

           Name		Number of messages
    Arpan Dutta		911
         Prasid		3933
     Nirmal Roy		2625
  Sourav Sanyal		4035
       Deeptish		1302
            Raj		644
