HOMOPHONE

Each of two or more words having the same pronunciation but different meanings, origins, or spelling, for example new and knew.
Each of a set of symbols denoting the same sound or group of sounds.
https://en.wikipedia.org/wiki/Homophone

Fine, maybe Reporter and Harry Porter are not considered homophone but try to pronounce it wearing a Medical Mask.

![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS9331l0arS-uL0J-fZ3qHVX06o_hJDZsauew&usqp=CAU)en.wikipedia.org

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import math
from textwrap import wrap
warnings.filterwarnings('ignore')
sns.set_palette('Set2')
sns.set_style('darkgrid')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/harry-potter-and-the-philosophers-stone-script/hp_script.csv', encoding='cp1252')
df.head(10)

In [None]:
#Thiago Panini https://www.kaggle.com/thiagopanini/pycomp-predicting-survival-on-titanic-disaster/notebook

!pip install pycomp

In [None]:
#Codes by Thiago Panini https://www.kaggle.com/thiagopanini/pycomp-predicting-survival-on-titanic-disaster/notebook

# Importing libraries
from pycomp.viz.insights import *

# Character rate
character_map = {'Hermione Granger', 'Albus Dumbledore'}
character_colors = ['crimson', 'darkslateblue', 'darkgreen', 'BlueViolet', 'chartreuse', 'cadetblue', 'aquamarine']
plot_donut_chart(df=df, col='character_name', label_names=character_map, colors=character_colors,
                 title='Harry Potter Characters')

In [None]:

#Code by Mohammad Imran Shaikh https://www.kaggle.com/shikhnu/covid19-tweets-eda-visualization-wordcloud

unique_df = pd.DataFrame()
unique_df['Features'] = df.columns
unique=[]
for i in df.columns:
    unique.append(df[i].nunique())
unique_df['Uniques'] = unique

f, ax = plt.subplots(1,1, figsize=(15,7))

splot = sns.barplot(x=unique_df['Features'], y=unique_df['Uniques'], alpha=0.8)
for p in splot.patches:
    splot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center',
                   va = 'center', xytext = (0, 9), textcoords = 'offset points')
plt.title('Bar plot for number of unique values in each column',weight='bold', size=15)
plt.ylabel('#Unique values', size=12, weight='bold')
plt.xlabel('Features', size=12, weight='bold')
plt.xticks(rotation=90)
plt.show()

In [None]:
#word cloud
from wordcloud import WordCloud, ImageColorGenerator
text = " ".join(str(each) for each in df.character_name)
# Create and generate a word cloud image:
wordcloud = WordCloud(max_words=200,colormap='Set1', background_color="purple").generate(text)
plt.figure(figsize=(10,6))
plt.figure(figsize=(15,10))
# Display the generated image:
plt.imshow(wordcloud, interpolation='Bilinear')
plt.axis("off")
plt.figure(1,figsize=(12, 12))
plt.show()

In [None]:
s = (df.isna().sum()/df.shape[0]*100)<50
df_modified = df[s.index[s].tolist()]
print (df_modified.shape)
df_modified.head()

In [None]:
plt.rcParams['font.size'] = 14
fig, ax = plt.subplots(1, 2, figsize=(20,20))
for col, ax in zip(['character_name','dialogue'], ax.flat):
    dict_ = df_modified[col].value_counts().head(10).to_dict()
    if ('Not Available' in dict_.keys()):
        dict_.pop('Not Available')
    labels = []
    for i in dict_.keys():
        i = i.split(' ')
        if (len(i) > 6):
            i[math.ceil(len(i)/2)-1] += '\n'
            labels.append(' '.join(i))
        else:
            labels.append(' '.join(i))
    ax.pie(x=list(dict_.values()), labels=labels, shadow=True, startangle=0)
    
    col = (' '.join(col.split('_'))).upper()
    ax.set_title(col, weight='bold', fontsize=18)
plt.tight_layout()
plt.show()

In [None]:
#Code by Savita Nair https://www.kaggle.com/savitanair/hr-analytics

print(f'Dataset has {len(df.character_name.unique())} unique groups')
print('*'*20)
print(f'And the top 10 counts are :')
print(df.character_name.value_counts().head(10))
print('*'*20)

c = df.character_name.value_counts().head(10)
fig, ax = plt.subplots(1,1,figsize=(12,6))
ax.bar(c.index, c.values, width=0.8, color='y')
plt.xticks(rotation=45)

#Codes by Dexter https://www.kaggle.com/soul9862/the-movies-recommend-analysis-cosine-similarity

In [None]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['dialogue'])
print(tfidf_matrix.shape)

In [None]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [None]:
indices = pd.Series(df.index, index=df['dialogue']).drop_duplicates()
print(indices)

In [None]:
idx = indices['Good evening, Professor Dumbledore. Are the rumours true Albus?']
print(idx)

In [None]:
def get_recommendations(dialogue, cosine_sim=cosine_sim):
    idx = indices[dialogue]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    scene_indices = [i[0] for i in sim_scores]

    return df['dialogue'].iloc[scene_indices]

In [None]:
get_recommendations('Hagrid is bringing him.')

![](https://pics.me.me/hagrids-lines-in-the-first-harry-potter-movie-vital-plot-64781360.png)https://me.me/i/hagrids-lines-in-the-first-harry-potter-movie-vital-plot-85e0d653c19a4c41b79b2ef0db7170de