In [1]:
# For web scraping
import requests
from bs4 import BeautifulSoup
import selenium
from selenium import webdriver
import tweepy

# Data Storage
import sqlite3

# For data manipulation
import pandas as pd
import numpy as np
import dask
import dask.dataframe as dd

# For data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# For advanced data manipulation
from scipy import stats

# For machine learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# For working with APIs
import json

# For datetime operations
from datetime import datetime

# Additional utilities
import os
import re
import json
import requests
import time
import zipfile

# For time series analysis
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

# For real-time data streaming
import websocket
import socket

# For asynchronous programming (useful for real-time data processing)
import asyncio


# Data Collection

## 2.1 Twitter API Access
Apply for Access: If you haven't already, apply for a Twitter Developer account at developer.twitter.com. You'll need to explain your project's purpose and how you'll use the data.
Create an Application: Once approved, create a new application in the Twitter Developer portal to get your API keys and tokens — these are necessary for accessing the Twitter API.

## 2.2 Understand Twitter API Limitations
Rate Limits: Familiarize yourself with the Twitter API rate limits to avoid hitting the cap on the number of requests.
Data Availability: Twitter API provides access to tweets from the last 7 days for the standard search API, and more historical data with the premium or enterprise tiers.

## 2.3 Develop Data Collection Script
Install Tweepy: Use the Python library tweepy for interacting with the Twitter API. Install it via pip (pip install tweepy).
Authentication: Use your API keys and tokens to authenticate your requests.
Querying Tweets: Write a function to query tweets based on the product or brand name inputted by the user. Use query parameters effectively to filter and retrieve relevant tweets.
Handling Rate Limiting: Implement logic to handle rate limiting by the Twitter API, such as waiting and retrying after a certain period.

## 2.4 Store Collected Data
Temporary Storage: Initially, you might store the tweets in a temporary data structure like a list or a Pandas DataFrame.
Database Storage: For long-term storage and retrieval, consider saving the tweets to a database. Choose between SQL or NoSQL based on your preference and the data's structure.

## 2.5 Error Handling
API Errors: Implement error handling for issues like network errors, API rate limits, or invalid responses.
Data Quality Checks: Put checks in place to ensure the quality of the data collected (e.g., filtering out irrelevant or spammy tweets).

In [2]:
# Twitter API keys
api_key = 'F0qKO41dErn04DpsRuAtnnSaT'
api_secret_key = 'TKTT695N6shmAVxsSVzVRGUF9CSKLqoIPrkeHLDHHfj5UaNHUv'
bearer_token = 'AAAAAAAAAAAAAAAAAAAAAEPSjQEAAAAA3DQLgk5ybCdfGUtqI%2FKv4SruAHY%3DhPf2TpZzegGcm4L3ExHFmATJmXl5VECRIHJPxhfZwxpuYTsf4U'
access_token = '2931998159-oEfo3wO1SsEkil6NJ1T3Wni7lvdciTKLIvNeUz3'
access_token_secret = 'Pu7kueCRteEwU28vzqpsCh0Y0AQ9y0wIqW8VssrZUoDDN'
client_id = 'cl9xZUpDZE9Bb01aZUdIWWQ3aFM6MTpjaQ'
client_id_secret = 'qHcKpBGB1YLgIQdRfcgMf4YCBzZpYy_OQlkf67mE_afJ1T2C3l'

# Authenticate
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

In [None]:
def search_tweets(query, max_tweets):
    tweets = tweepy.Cursor(api.search_tweets, q=query, lang="en", tweet_mode='extended').items(max_tweets)
    
    tweet_list = []
    for tweet in tweets:
        tweet_list.append(tweet.full_text)

    return tweet_list

product_name = "ChatGPT"
max_tweets = 50
tweets_about_product = search_tweets(product_name, max_tweets)

# Print the fetched tweets
for tweet in tweets_about_product:
    print(tweet)

In [None]:
# Storing the tweets in a database

def create_database():
    # Connect to SQLite database (it will be created if it doesn't exist)
    conn = sqlite3.connect('twitter_data.db')

    # Create a new SQLite table with columns for different tweet attributes
    conn.execute('''CREATE TABLE IF NOT EXISTS tweets
                 (id INTEGER PRIMARY KEY AUTOINCREMENT,
                  tweet_text TEXT,
                  query TEXT,
                  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
    
    # Commit changes and close the connection
    conn.commit()
    conn.close()

create_database()

def store_tweet(tweet_text, query):
    conn = sqlite3.connect('twitter_data.db')
    cur = conn.cursor()

    # Insert a new row of data
    cur.execute("INSERT INTO tweets (tweet_text, query) VALUES (?, ?)", (tweet_text, query))

    # Commit changes and close the connection
    conn.commit()
    conn.close()
    
# For fetching the data in the database later
def get_tweets_by_query(query):
    conn = sqlite3.connect('twitter_data.db')
    cur = conn.cursor()

    # Select tweets that match the query
    cur.execute("SELECT tweet_text FROM tweets WHERE query=?", (query,))
    all_tweets = cur.fetchall()

    conn.close()
    return all_tweets


# Step 3: Data Processing
3.1 Clean and Preprocess Data
Implement functions to clean tweets (removing URLs, mentions, hashtags, and special characters).
Normalize text data (like converting to lowercase, removing punctuation).

Remove URLs: URLs in tweets can be removed as they usually don't contribute to sentiment analysis.
Remove Mentions and Hashtags: Mentions (@usernames) and hashtags (#hashtag) can also be removed or kept based on your analysis requirement.
Remove Special Characters and Numbers: Special characters and numbers often don't contribute to sentiment analysis and can be removed.
Convert to Lowercase: Convert all texts to lowercase to maintain consistency.

3.2 Data Storage
Decide on how you'll store the fetched tweets (e.g., in a database or files).
Implement the storage mechanism in your script.

In [7]:
import re

def clean_tweet(tweet):
    tweet = re.sub(r'http\S+', '', tweet)  # Remove URLs
    tweet = re.sub(r'@\S+', '', tweet)  # Remove mentions
    tweet = re.sub(r'#\S+', '', tweet)  # Remove hashtags
    tweet = re.sub(r'[^A-Za-z\s]', '', tweet)  # Remove special characters and numbers
    tweet = tweet.lower()  # Convert to lowercase
    return tweet

In [None]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

def lemmatize_tweet(tweet):
    words = tweet.split()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(lemmatized_words)

def remove_stopwords(tweet):
    words = tweet.split()
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

def preprocess_tweet(tweet):
    tweet = clean_tweet(tweet)
    tweet = lemmatize_tweet(tweet)
    tweet = remove_stopwords(tweet)
    return tweet

# Step 4: Sentiment Analysis
4.1 Choose Sentiment Analysis Method
Decide whether to use a pre-built sentiment analysis tool or to train your own model.
If using pre-built tools, test them to see which fits your needs best.

4.2 Implement Sentiment Analysis
Integrate the sentiment analysis into your data processing pipeline.
Test the accuracy and adjust as necessary.

# Step 5: Building the Web Interface
5.1 Design the UI
Sketch a basic layout of the web interface.
Decide on the functionalities (e.g., search input, results display).

5.2 Develop the Web Application
Use Flask or Django to build the web interface.
Implement the front-end to allow users to input a brand or product name.
Connect the back-end to handle requests and display results.

# Step 6: Visualization and Reporting
6.1 Implement Data Visualization
Use libraries like Matplotlib or Plotly to create visualizations of the sentiment analysis.
Integrate these visualizations into your web interface.

# Step 7: Testing and Validation
7.1 Testing
Conduct thorough testing of your application, covering edge cases.
Test the system's performance and accuracy.

7.2 User Testing
Get feedback from potential users on usability and functionality.

# Step 8: Deployment and Maintenance
8.1 Choose a Deployment Platform
Decide where to host your web application (e.g., Heroku, AWS, Google Cloud).

8.2 Deploy the Application
Deploy your web application.
Ensure it's running smoothly and is able to handle multiple users.

8.3 Plan for Maintenance
Set up a system for regular maintenance and updates.