In [78]:
import PyPDF2
import pandas as pd
from textblob import TextBlob
import plotly.graph_objects as go
# surpress warnings
import warnings
warnings.filterwarnings('ignore')

In [79]:
# create plotly gauge chart for sentiment
def sentiment_gauge(sentiment_polarity, file):
    
    
    fig = go.Figure(go.Indicator(
        #plot size and background color
        
        mode="gauge+number",
        value=sentiment_polarity,
        domain={'x': [0, 1], 'y': [0, 1]},
        title={'text': f"Sentiment Polarity of {file}"},
        gauge={'axis': {'range': [-1, 1]},

               'steps': [
            {'range': [-1, -0.5], 'color': "red"},
            {'range': [-0.5, 0.5], 'color': "lightgrey"},
            {'range': [0.5, 1], 'color': "lightgreen"}],
            'threshold': {'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': sentiment_polarity}}))
    fig.update_layout(
        width = 900,
        height = 900,
        #font size
        font=dict(

             size=18  # font size

         )
        )
    fig.show()



In [80]:
import plotly.graph_objects as go

def sentimentBARChart(sentimentTable: pd.DataFrame):
    # Extract x-axis (categories) and y-axis (scores) data
    categories = sentimentTable['file']
    scores = sentimentTable['sentiment']

    # Define color array based on sentiment scores
    colors = ['green' if score >= 0 else 'red' for score in scores]
 
    # Create bar chart with custom colors
    fig = go.Figure(
        data=[go.Bar(x=categories, y=scores, marker=dict(color=colors))],
        layout=go.Layout(title='Sentiment Analysis', yaxis_title='Sentiment Score',yaxis=dict(range=[-1, 1]))
    )
    fig.update_layout(
        width=1200,
        height=1200,
        font=dict(size=18)  # font size
    
    )
    

    # Display the chart
    fig.show()



In [81]:
def sentimentPDF(filePath: str):
    # Create a DataFrame with one column called 'line'
    df = pd.DataFrame(columns=['line'])

    # Open the PDF file in read-binary mode
    fileName = filePath.split('/')[-1]
    with open(filePath, 'rb') as file:
        # Create a PDF reader object
        reader = PyPDF2.PdfReader(file)

        # Iterate over each page in the PDF
        for page in reader.pages:
            # Extract the text from the page
            extracted_text = page.extract_text()
            
            # Split the extracted text into lines
            lines = extracted_text.split('\n')
            
            # Create a DataFrame with the lines extracted from the page
            new_df = pd.DataFrame({'line': lines})
            
            # Concatenate new_df with df
            df = pd.concat([df, new_df], ignore_index=True)

    # Perform sentiment analysis on the extracted text using TextBlob
    blob = TextBlob(df['line'].str.cat(sep='\n'))

    # Get the overall sentiment polarity (-1 to 1) and subjectivity (0 to 1)
    sentiment_polarity = blob.sentiment.polarity
    sentiment_subjectivity = blob.sentiment.subjectivity

    # Print the sentiment polarity and subjectivity
    print("Sentiment Polarity:", sentiment_polarity)
    print("Sentiment Subjectivity:", sentiment_subjectivity)
    sentiment_gauge(sentiment_polarity, fileName)
    
    return sentiment_polarity


In [82]:
def sentimentPDFdata(files: list):
    # Create a DataFrame with columns 'file' and 'sentiment'
    df = pd.DataFrame(columns=['file', 'sentiment'])
    
    # Iterate over the files
    for file in files:
        fileName = file.split('/')[-1]
        data = {'file': [fileName], 'sentiment': [sentimentPDF(file)]}
        new_df = pd.DataFrame(data)
        
        # Concatenate new_df with df
        df = pd.concat([df, new_df], ignore_index=True)
    
    return df


In [83]:
filesIreland = ['Data/sentiment/homeBuilding/246610_5161c6c9-559c-4b56-93ad-fe894e7ee4e0.pdf','Data/sentiment/homeBuilding/Construction Sector Performance and Capacity.pdf', 'Data/sentiment/homeBuilding/PII Housing Review September 2022.pdf','Data/sentiment/homeBuilding/SCSI_ResidentialPropertyReport2022_Final.pdf']
sentimentIreland_df = sentimentPDFdata(filesIreland)
sentimentBARChart(sentimentIreland_df)
sentimentIreland_df


Sentiment Polarity: 0.012417732331525436
Sentiment Subjectivity: 0.4470569405052166


Sentiment Polarity: 0.05436180648027537
Sentiment Subjectivity: 0.38453445066476655


Sentiment Polarity: 0.09926903968351346
Sentiment Subjectivity: 0.4228530341490868


Sentiment Polarity: 0.0357548484352608
Sentiment Subjectivity: 0.3543092684329799


Unnamed: 0,file,sentiment
0,246610_5161c6c9-559c-4b56-93ad-fe894e7ee4e0.pdf,0.012418
1,Construction Sector Performance and Capacity.pdf,0.054362
2,PII Housing Review September 2022.pdf,0.099269
3,SCSI_ResidentialPropertyReport2022_Final.pdf,0.035755


In [84]:
filesEurope = ['Data/sentiment/homeBuilding/europe/Emerging Trends in Real Estate Europe 2023 Report.pdf','Data/sentiment/homeBuilding/europe/at-property-index-2022-final.pdf','Data/sentiment/homeBuilding/europe/dp171_en.pdf']
sentimentEurope_df = sentimentPDFdata(filesEurope)
sentimentBARChart(sentimentEurope_df)
sentimentEurope_df

Sentiment Polarity: 0.11588104585056695
Sentiment Subjectivity: 0.3787373972871106


Sentiment Polarity: 0.09585107129460582
Sentiment Subjectivity: 0.3964008039852869


Sentiment Polarity: 0.09179331721284031
Sentiment Subjectivity: 0.3802362095261567


Unnamed: 0,file,sentiment
0,Emerging Trends in Real Estate Europe 2023 Rep...,0.115881
1,at-property-index-2022-final.pdf,0.095851
2,dp171_en.pdf,0.091793


In [85]:
#compare sentiment of different files in a bar chart

In [86]:
buildCostfilesIreland = ['Data/sentiment/buildingCost/Ireland/2022-05-24_opening-statement-kevin-james-vice-president-society-of-chartered-surveyors-ireland_en.pdf','Data/sentiment/buildingCost/Ireland/256082_afbe94c3-ebf1-4201-9a4a-a6ac9cddc69a.pdf', 'Data/sentiment/buildingCost/Ireland/Rising-construction-costs-and-the-residential-real-estate-market-in-Ireland.pdf']
buildCostsentimentIreland_df = sentimentPDFdata(buildCostfilesIreland)
sentimentBARChart(buildCostsentimentIreland_df)
buildCostsentimentIreland_df

Sentiment Polarity: 0.07566868883195413
Sentiment Subjectivity: 0.3619561013438563


Sentiment Polarity: 0.03933401738823433
Sentiment Subjectivity: 0.324721967590643


Sentiment Polarity: 0.0667232191927314
Sentiment Subjectivity: 0.3585281639092621


Unnamed: 0,file,sentiment
0,2022-05-24_opening-statement-kevin-james-vice-...,0.075669
1,256082_afbe94c3-ebf1-4201-9a4a-a6ac9cddc69a.pdf,0.039334
2,Rising-construction-costs-and-the-residential-...,0.066723


In [87]:
buildCostfilesEurope = ['Data/sentiment/buildingCost/Europe/22-cs11-_Construction_Building_Materials_-_Commentary_October_2022.pdf','Data/sentiment/buildingCost/Europe/ING-Think-eu-construction-outlook-optimism-among-contractors-despite-increasing-building-material-shortage.pdf', 'Data/sentiment/buildingCost/Europe/International Construction Costs 2022-2.pdf']
buildCostsentimentEurope_df = sentimentPDFdata(buildCostfilesEurope)
sentimentBARChart(buildCostsentimentEurope_df)
buildCostsentimentEurope_df

Sentiment Polarity: 0.07358007154882146
Sentiment Subjectivity: 0.38110142558059285


Sentiment Polarity: 0.07648900740155493
Sentiment Subjectivity: 0.33650689403541134


Sentiment Polarity: 0.11189386577141676
Sentiment Subjectivity: 0.3984357146538268


Unnamed: 0,file,sentiment
0,22-cs11-_Construction_Building_Materials_-_Com...,0.07358
1,ING-Think-eu-construction-outlook-optimism-amo...,0.076489
2,International Construction Costs 2022-2.pdf,0.111894
