In [32]:
import os
import numpy as np
import pandas as pd
import scipy.io.wavfile as wav
from sklearn.mixture import GaussianMixture
from python_speech_features import mfcc

def extract_features(audio_path, trim_length=None):
	sr, y = wav.read(audio_path)
	if trim_length:
		y = y[:trim_length * sr]  # multiplying by sr to convert seconds to samples
	mfccs = mfcc(y, sr, numcep=100)
	return mfccs

def train_gmm(features, n_components=3):
	gmm = GaussianMixture(n_components=n_components)
	gmm.fit(features)
	return gmm

def load_and_process_audio(file_path):
	features = extract_features(file_path)
	return features

def main():
	# Directory containing the training audio files
	directory = "../../real_audio/"
	# Load client profiles
	client_profiles = pd.read_csv("../../../client_profiles/real_match_name.csv")
	# Create a dictionary mapping rec_ids to file paths and client names
	audio_files = {row['rec_id']: (os.path.join(directory, row['rec_id'] + '.wav'), row['name'])
				for _, row in client_profiles.iterrows() if os.path.isfile(os.path.join(directory, row['rec_id'] + '.wav'))}

	gmms = {}
	for rec_id, (file_path, name) in audio_files.items():
		features = load_and_process_audio(file_path)
		gmm = train_gmm(features)
		gmms[rec_id] = gmm
	#save gmms in src/impersonator/gmms.npy
	# np.save('gmms.npy', gmms)

	# Load matched results to compare
	matched_results = pd.read_csv("filtered_matched_results.csv")

	# Compare results
	results = []
	directory = "../../../audio_clips"
	for _, row in matched_results.iterrows():
		rec_id = row['rec_id']
		actual_name = row['name']
		# Get the best match
		best_match = None
		features = load_and_process_audio(os.path.join(directory, rec_id + '.wav'))
		best_score = -np.inf
		for rec_id, gmm in gmms.items():
			score = gmm.score(features)
			if score > best_score:
				best_score = score
				best_match = rec_id
		rec_id = row['rec_id']
		print(best_score)
		best_match_name = audio_files[best_match][1]
		match_result = actual_name == best_match_name
		results.append((rec_id, actual_name, best_match_name, match_result))
		# print(f"Actual name: {actual_name}, Best match: {best_match_name}, Match result: {match_result}")


	df_results = pd.DataFrame(columns=['rec_id', 'is_impersonator'])
	for rec_id, actual_name, best_match_name, match_result in results:
		print(f"rec_id: {rec_id}, actual_name: {actual_name}, best_match_name: {best_match_name}, match_result: {match_result}")
		if match_result:
			df_results = df_results._append({'rec_id': rec_id, 'is_impersonator': 0}, ignore_index=True)
		else:
			df_results = df_results._append({'rec_id': rec_id, 'is_impersonator': 1}, ignore_index=True)
	#save results in src/impersonator/impersonator.csv
	df_results.to_csv('src/impersonator/impersonator.csv', index=False)

if __name__ == "__main__":
	main()





-76.60868006967293
-94.17680896268458
-92.11631950833596
-76.84557114970181




-92.37052712377714
-93.7435744485612
-74.5080388904029




-96.12396602916395
-97.58004989663898
-90.7260028118405




-97.47884025633356




-75.0940895797581
-88.68946330182278




-69.72851039756225
-97.47492869415544




-78.35893814272445
-72.88192420692748
-93.55544423906704




-92.72885652846594
-90.47447535720573




-92.1844355021016
-78.53250895984566
-91.53385073127492




-79.03269634841955
-91.02746023928562




-74.73948410428855
-93.30435371540503
-75.98492992388546




-87.12812466969677
-76.91257566339362




-75.87678583392776
-75.08268428175917
-91.52652973864106




-93.31367843662038
-92.05520118867348
-91.63949537926071




-103.88247857132198
-78.49910434836173




-93.05433052487965
-97.0320479505846
-71.74630394687645




-91.31910886800007
-77.2593289749819
-92.1755596014993




-103.67752448885328




-76.909169532846
-89.61623606671667




-89.0820011635636
-94.84053450320961
-90.24058301955688




-82.58330611779533
-97.29640395099469




-72.53475692679342
-93.01055149086552
-93.86549088651425




-91.12082748690035
-80.2955382924744




-78.88781209172352
-97.74331390281893
-77.71393876354233




-88.33297557896111
-75.44899798889423




-78.33145869101976
-73.87492516756514
-76.13641473995992




-92.42264448242553
-95.7110915902648
-99.14549821127758
-73.38522455947277




-91.61875339092165
-82.24577098410577
-75.61491030563408




-74.44139870394702
-93.47138304063031




-75.59801193672358
-90.40463593497678
-90.80761609034944




-90.4981267263744
-88.59757901178865




-71.8915846501556
-76.75332745877472




-76.82731022228063
-81.94703453064086
-75.75593798332072




-95.19050722990883
-74.9909545877773




-101.20731969895535




-76.992513847766
-92.34586865094849




-82.06752252276237
-79.26085512158265
-97.3084793119834




-77.48979641815316
-74.59161524211983




-73.5489415123387
-87.22348761265596
-93.66325184180326




-78.65407168287695
-76.94426024350827
-94.08582099449586




-77.10616239006048
-76.57802607296419




-78.67603287975598
-94.11518129940116
-80.12695366315482




-92.59033369206003
-77.66720533362434




-96.08199221802576
-88.2211472446988
-97.4022525284247




-96.25061455601575
-78.82564202997455




-92.97047525784004
-89.65706413470909




-92.56082745476527
-76.65016055759001




-78.13042965786332
-80.07193342496487
-78.74069975829849




-92.54844352104163
-80.96120483422958
-88.47116099947992




-78.01928448921618
-81.22617234773291




-76.07262328631704
-91.71351183489583




-76.09327566347159
-80.18719160844954
-92.95442543401298




-96.53025718183838
-79.93548536858914




-78.47792701963942
-94.10472663034987
-95.63783471334226




-74.74646519114467




-71.86105074419156
-75.96902492244665




-91.59394414608686
-91.4957417989457
-98.9118977906762




-73.60928051105417
-78.42213936224417




-94.11898396190884
-75.66790075923247




-88.44175516953769
-92.7429753755759




-72.73553788532438
-75.1101248514969




-78.60248627378175
-78.72964910874924




-77.54052355384988
-90.87421554179892
-92.52667052155445




-78.80611998217996
-97.05245173287156
-74.83702707007684
-87.20109936415366




-74.53028797997422
-91.04100299654343
-90.9555578286974




-75.32133572057798
-81.949905203604
-93.55821130749328
-70.41243813543953




-76.77780918846612
-92.69751909952896




-78.35196590636886
-76.80118529277506
-93.37057336745158




-92.57966804014679
-92.18736817789794




-79.52662788666127
-79.07068647410239
-93.09081846087149




-78.01165282629775
-96.57057195886847
-75.83703880909775




-78.18068756501863
-81.90754176490181
-94.05666872174666




-97.69292252713002
-101.01828719430752
-95.50026578488021




-86.6865024563621
-76.28921614247714
-98.74760310454606




-98.45845362645883
-75.44759993776259




-94.75929517928918
-91.64529533126678




-75.30133058566561
-82.75090706759599
-91.40863989504047




-77.86849283047951




-78.68454747370053
-74.56282180114252
-76.53779460356066




-86.50652515296042
-74.98089641932086
-82.76437133829924




-86.38546812677785
-92.49063890178455
-92.41584474742751




-75.09680102258864
-83.94540302542298
-78.0760033190834




-93.35165541763224
-82.85347003592067
-73.98300017503325




-92.84155506552554
-75.29186912483478




-91.97994985910903
-94.34570684250788




-79.48301174724728
-78.18676116455102




-79.64477920780041
-92.45798603620726
-75.05447923831265




-93.54768831938367




-89.94913401278177
-76.62452762679135
-98.27102938579132




-90.98978248564377
-88.30338474082075
-97.01653910100944




-76.22791251348661
-80.27022283947316




-91.08437336339554
-72.53061495783628




-80.60079358777804
-79.03971539376788




-94.51355876239747
-73.66442427876672




-75.89456322251569
-92.02429051359721
-96.84603036758296




-91.57289724319263
-80.91241498920708




-74.41085139743036
-98.51379515957433
-76.58382969546804




-80.70215701804146
-73.62267213517555




-72.07080352657867
-75.78187751820614




-93.22249590128638
-77.01378809596926




-73.15596988721032




-79.09224980617178
-76.74053790769125




-102.23681020393566
-75.73687994667225




-73.879233037185
-92.55954001848366
-75.22656101481562




-90.19040694733616
-79.43209304809132




-94.71476694568605
-98.27308995369876




-73.94724637010184
-77.5088843876054




-88.67859540390009
-88.69357405221449




-92.9769860095851
-76.78795927128208




-74.72561452849743
-73.87459350638598




-95.63933853328966
-76.45938445764949




-76.52040721968031
-77.09094281762327




-77.78870190783114
-95.24609043449958
-95.9512721182824




-92.04589301877643
-71.08057059986079




-72.52316816425282
-74.07551919798833




-76.45581174397302




-77.37038356220435
-99.15806799009573
-78.49438653693016




-79.26887774030214
-91.9694696604303




-76.25243019301207
-77.87642873144083




-102.87577309888397
-96.71805306944545




-91.42348293979146
-95.72580245975527




-91.93984202815501
-89.25538622989279




-93.6152180673768
-79.77551784037169




-72.51066337890283
-94.992186133719




-91.69724422071006
-80.72137717884235




-79.60900427710312
-82.24017002004771




-76.71953495176987
-101.71660389371816
-97.13319845945952




-80.74657101537606
-76.71538138429014
-93.93559118447455




-72.21626060178977
-94.72584853800001
-75.44465465809287




-83.1367704227162
-93.41446418388628
-79.94599427690687




-77.10051481157949
-90.21008938165447




-69.42835868145856
-91.24811599599943




-102.30996023891674
-92.2520396503638




-100.11957325267346
-82.91630823258969
-82.41366483166475




-75.82565832855974
-95.67283422701766
-91.53753470041913




-90.9130097003212
-75.91055178871837
-88.8066842605628




-82.90139454253874
-76.57419378544627
-77.46776628450407




-91.27432786208038
-92.53343928282237
-78.62224888494802




-89.38190148656747
-90.58007862596429




-71.72217952889196
-72.76985393052351




-93.23630380447719
-77.32640311237375




-93.14106296134217
-83.07122595695193
-94.68083773224981




-72.94413164942988
-91.8820211230442




-74.96735744899284
-93.28161672579667
-73.4161881832566




-93.82644768869727
-81.2438711852118
-91.56508036151669




-76.10577173083881
-75.22015432349234
-91.38971520607532




-101.0783430817825
-89.80411973079264
-93.90159841959199




-78.11636805830179
-79.29741983369419




-91.4431777534703
-92.66336240798066
-77.68220516697201




-76.88940040273394




-79.90492710603843
-75.92995454087306




-77.81269721783023
-76.40407368213813




-92.10974881963753
-92.20683641754638
-78.71163101207948
-91.63978480775955




-90.76078330358746
-79.66156128951769




-83.27672532760313
-73.63751793277426




-74.00348506739418
-98.89181128870607




-90.48278945329383
-76.74110390434973




-93.24017813746079
-76.23930378800837
-91.67907648130947




-91.4427995486505
-93.4585265318777
-77.00059994657326




-76.66621127780948
-77.98936452255502




-75.63585004817081
-80.25268766454971
-78.27733014396196




-76.69376454040096
-93.0437509873332
-98.65752911566221
-79.73324749320325




-76.92571746578162
-75.05500324002124




-74.17367312991138
-92.8324067193883
-94.15921523010363




-73.4867323294706
rec_id: 43QRG7SY14, actual_name: Elias Svensson, best_match_name: Fatima Zahra Mansouri, match_result: False
rec_id: YXAXYF3SXW, actual_name: Finn Murphy, best_match_name: Charlotte Leclerc, match_result: False
rec_id: ZVS4M3Y201, actual_name: Mia Anderson, best_match_name: Mia Anderson, match_result: True
rec_id: NZBALVR6CF, actual_name: Mia Anderson, best_match_name: Olivia Tremblay, match_result: False
rec_id: GHSWJ7IZ96, actual_name: Amina Kouri, best_match_name: Lara van den Berg, match_result: False
rec_id: N2VPD13IWV, actual_name: Olivia Tremblay, best_match_name: Charlotte Leclerc, match_result: False
rec_id: 656QZ2VWR4, actual_name: Jorge Castillo, best_match_name: Jorge Castillo, match_result: True
rec_id: UMM0YIUKNH, actual_name: Noah Zimmerman, best_match_name: Fatima Zahra Mansouri, match_result: False
rec_id: Z1BCC9U0E5, actual_name: Sofia Almeida, best_match_name: Lara van den Berg, match_result: False
rec_id: XDAOHC9OY5, actual_name: Mia Anderson, best