### Import necessary libraries

In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
sys.path.append(r'../func')
import combine_obj
import export_obj

### Load necessary data from csv as dataframe

In [2]:
movie_df = pd.read_csv('..\Harry Potter-20220610T024151Z-001\Movies.csv', sep=',', quotechar='"')
movie_df.head(2)

Unnamed: 0,Movie ID,Movie Title,Release Year,Runtime,Budget,Box Office
0,1,Harry Potter and the Philosopher's Stone,2001,152,"$125,000,000","$1,002,000,000"
1,2,Harry Potter and the Chamber of Secrets,2002,161,"$100,000,000","$880,300,000"


In [3]:
chapter_df = pd.read_csv('..\Harry Potter-20220610T024151Z-001\Chapters.csv', sep=',', quotechar='"', encoding='Latin-1')
chapter_df.head(2)

Unnamed: 0,Chapter ID,Chapter Name,Movie ID,Movie Chapter
0,1,Doorstep Delivery,1,1
1,2,The Vanishing Glass,1,2


In [4]:
dialogue_df = pd.read_csv('..\Harry Potter-20220610T024151Z-001\Dialogue.csv', sep=',', quotechar='"', encoding='Latin-1')
dialogue_df.head(2)

Unnamed: 0,Dialogue ID,Chapter ID,Place ID,Character ID,Dialogue
0,1,1,8,4,I should have known that you would be here...P...
1,2,1,8,7,"Good evening, Professor Dumbledore. Are the ru..."


In [5]:
place_df = pd.read_csv('..\Harry Potter-20220610T024151Z-001\Places.csv', sep=',', quotechar='"', encoding='Latin-1')
place_df.head(2)

Unnamed: 0,Place ID,Place Name,Place Category
0,1,Flourish & Blotts,Diagon Alley
1,2,Gringotts Wizarding Bank,Diagon Alley


### Analyze

#### Generate basic data

In [6]:
dialogue_place_df = dialogue_df.merge(place_df, how='left' ,left_on='Place ID', right_on='Place ID')
dialogue_place_df = dialogue_place_df.merge(chapter_df, how='left' ,left_on='Chapter ID', right_on='Chapter ID')
dialogue_place_df.head(2)

Unnamed: 0,Dialogue ID,Chapter ID,Place ID,Character ID,Dialogue,Place Name,Place Category,Chapter Name,Movie ID,Movie Chapter
0,1,1,8,4,I should have known that you would be here...P...,4 Privet Drive,Dwellings,Doorstep Delivery,1,1
1,2,1,8,7,"Good evening, Professor Dumbledore. Are the ru...",4 Privet Drive,Dwellings,Doorstep Delivery,1,1


#### Dialogue count by each place per movie

In [7]:
groupBy_movie_df = dialogue_place_df.groupby(['Movie ID', 'Place ID'])['Movie ID'].count().reset_index(name='Dialogue count')
groupBy_movie_df.head(2)

Unnamed: 0,Movie ID,Place ID,Dialogue count
0,1,2,14
1,1,4,10


In [8]:
idx = groupBy_movie_df.groupby(['Movie ID'])['Dialogue count'].transform(max) == groupBy_movie_df['Dialogue count']
max_df = groupBy_movie_df[idx]
max_df = max_df.merge(movie_df, how='left' ,left_on='Movie ID', right_on='Movie ID')
max_df = max_df.merge(place_df, how='left' ,left_on='Place ID', right_on='Place ID')
max_df

Unnamed: 0,Movie ID,Place ID,Dialogue count,Movie Title,Release Year,Runtime,Budget,Box Office,Place Name,Place Category
0,1,36,100,Harry Potter and the Philosopher's Stone,2001,152,"$125,000,000","$1,002,000,000",Great Hall,Hogwarts
1,2,8,96,Harry Potter and the Chamber of Secrets,2002,161,"$100,000,000","$880,300,000",4 Privet Drive,Dwellings
2,3,40,154,Harry Potter and the Prisoner of Azkaban,2004,142,"$130,000,000","$796,700,000",Hagrid's Hut,Hogwarts
3,4,36,126,Harry Potter and the Goblet of Fire,2005,157,"$150,000,000","$896,400,000",Great Hall,Hogwarts
4,5,70,233,Harry Potter and the Order of the Phoenix,2007,138,"$150,000,000","$942,000,000",Ministry of Magic,Other Magical Locations
5,6,49,144,Harry Potter and the Half-Blood Prince,2009,153,"$250,000,000","$943,200,000",Potions Classroom,Hogwarts
6,7,68,179,Harry Potter and the Deathly Hallows Part 1,2010,146,"$200,000,000","$976,900,000",Forest of Dean,Other Magical Locations
7,8,66,102,Harry Potter and the Deathly Hallows Part 2,2011,130,"$250,000,000","$1,342,000,000",Viaduct Courtyard,Hogwarts


In [9]:
# prepare chart data
chart_title = 'No. of dialogue of the most location per movie'
x_label = 'Movie'
x = combine_obj.combine_2_series(max_df['Movie Title'], max_df['Release Year'])
annotation_text = max_df['Place Name']
y1_label = 'Dialogue count'
y1 = max_df['Dialogue count']
# Create figure with secondary y-axis
fig = make_subplots()
# Add traces
fig.add_trace(
    go.Bar(x=y1, y=x,name=x_label, width=.7, orientation='h', text=annotation_text, insidetextanchor="start")
)
# Add figure title
fig.update_layout(
    title_text=chart_title,
    barmode='stack',
    font=dict(size=14),
    bargap=0.5
)

fig.show()

In [10]:
char_id = 'e4340fd8-4c1d-4645-884f-5c14aecf902e'
export_obj.export_chart_to_html(
    fig=fig, 
    height=600, 
    chart_id=char_id,
    chart_title=chart_title,
    path_to_filename='../docs/{}.html'.format(char_id), 
    describtion_list=[
        {'This chart displays the most popular location in each film': [
            'The x-axis will indicate how many times that the location is repeated', 
            'The y-axis will be the movie name',
            'The name is inside of the bar will be the location name'
        ]},
        'The year is adjacent to the movie name is the release year'
    ], 
    insight_list=[
        'Harry Potter has the highest number of dialogues in every movies => answering for the question 3',
        '"Great Hall" is the only location mentioned most in 2 movies "Goblet of Fire" (2005) and "Philosopher\'s Stone" (2001)'
    ]
)