# Harry Potter Reviews Analysis ⚡

## Importing Libraries

In [1]:
import pandas as pd
import plotly.express as px
from wordcloud import WordCloud

> ## Dataset Information

In [2]:
df = pd.read_csv('/kaggle/input/harry-potter-reviews/harry_potter_reviews.csv')

In [3]:
df.sample(5)

Unnamed: 0,user_id,user_sex,user_age,user_country,rating,comment,favourite_character,date
379,379,male,23,Germany,3.0,"""The film's tone fails to capture the essence ...",Ron Weasley,2002-06-05
83,83,female,56,Belgium,3.0,"""The character interactions felt forced, and t...",Ron Weasley,2002-06-25
397,397,female,37,Finland,4.5,"""The well-paced storytelling kept me immersed ...",Ron Weasley,2004-08-07
52,52,female,44,Germany,5.0,"""Neville Longbottom's courage is truly commend...",Neville Longbottom,2004-04-27
417,417,female,27,Croatia,5.0,"""Neville Longbottom's journey is a highlight.""",Neville Longbottom,2002-08-09


In [4]:
df.describe()

Unnamed: 0,user_id,user_age,rating
count,491.0,491.0,491.0
mean,245.0,35.274949,3.979633
std,141.883755,11.231868,0.945771
min,0.0,8.0,1.5
25%,122.5,28.0,3.5
50%,245.0,35.0,4.5
75%,367.5,43.0,4.5
max,490.0,64.0,5.0


In [5]:
df.shape

(491, 8)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 491 entries, 0 to 490
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   user_id              491 non-null    int64  
 1   user_sex             491 non-null    object 
 2   user_age             491 non-null    int64  
 3   user_country         491 non-null    object 
 4   rating               491 non-null    float64
 5   comment              491 non-null    object 
 6   favourite_character  491 non-null    object 
 7   date                 491 non-null    object 
dtypes: float64(1), int64(2), object(5)
memory usage: 30.8+ KB


> ##  Visualisation 

In [7]:
fig_rating = px.histogram(df, x='rating', title='Distribution of Ratings', nbins=10, opacity=0.7)
fig_rating.show()

In [8]:
fig_age = px.histogram(df, x='user_age', title='Distribution of User Ages', nbins=10, opacity=0.7)
fig_age.show()

In [9]:
fig_sex = px.histogram(df, x='user_sex', title='Distribution of User Gender')
fig_sex.show()

In [10]:
fig_time = px.histogram(df, x='date', title='Count of Reviews Over Time', opacity=0.7)
fig_time.show()

In [11]:
fig_characters = px.bar(df['favourite_character'].value_counts(normalize=True), 
                         title='Percentage of Favorite Characters',
                         labels={'index': 'Character', 'value': 'Percentage'})
fig_characters.show()

In [12]:
female_df = df[df['user_sex'] == 'female']
favorite_characters_female = female_df['favourite_character'].value_counts()
fig_favorite_characters_female = px.bar(favorite_characters_female, 
                                        title='Favorite Characters Among Females',
                                        labels={'index': 'Character', 'value': 'Count'})
fig_favorite_characters_female.show()

In [13]:
male_df = df[df['user_sex'] == 'male']
favorite_characters_male = male_df['favourite_character'].value_counts()
fig_favorite_characters_male = px.bar(favorite_characters_male, 
                                        title='Favorite Characters Among Males',
                                        labels={'index': 'Character', 'value': 'Count'})
fig_favorite_characters_male.show()

In [14]:
df_country = df[['user_country']]
reviews_per_country = df_country['user_country'].value_counts()
top_5_countries = reviews_per_country.head(5)
fig = px.pie(top_5_countries, 
             values=top_5_countries.values, 
             names=top_5_countries.index, 
             title='Top 5 Countries with Most Reviews')
fig.show()


In [15]:
comment_words = ' '.join(df['comment'])
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(comment_words)
fig_comments = px.imshow(wordcloud)
fig_comments.show()

> ## Your time spent exploring this notebook is valued! ˎˊ˗⋆｡°✩📄
> ## If you've found it insightful and beneficial, please consider giving it an Upvote. Don't hesitate to share your analysis ideas in the comments below. Until next time, happy analyzing!