# Task
## Part 1
Write a script that downloads tweets data on a specific search topic using the standard search API. The script should contain the following functions: 
1. scrape_tweets() that has the following parameters:
    1. Search topic
    2. The number of tweets to download per request
    3. The number of requests  
And returns a dataframe.
2. Save_results_as_csv() that has the following parameters:
    1.	the dataframe from the above function  
    And returns a csv file with the following naming format:
    
    *tweets_downloaded_yymmdd_hhmmss.csv (where ‘yymmdd_hhmmss’ is the current 	timestamp)*

The following attributes of the tweets should be extracted:
* Tweet text
* Tweet id
* Source
* Coordinates
* Retweet count
* Likes count
* User info
    - Username
    - Screenname
    - Location
    - Friends count
    - Verification status
    - Description
    - Followers count

Make sure to not include retweets.  
Make sure you the same tweets appearing multiple times in your final csv.

## Part 2
Create a MongoDB database called Tweets_db and store the extracted tweets into a 	collection named: raw_tweets.


Relevant resources:  
Twitter API docs: https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/api-reference/get-search-tweets  
Tweepy docs: http://docs.tweepy.org/en/latest/api.html  
Installing mongoDB locally: https://docs.mongodb.com/manual/administration/install-community/  
Creating CRUD applications for MongoDB with python: https://www.mongodb.com/blog/post/getting-started-with-python-and-mongodb


## Install Packages

In [None]:
# Package to manipulate env file
!pip install python-decouple

# Tweepy
!pip install tweepy

# Pymongo. MongoDB driver for working with MongoDB
!pip3 install pymongo[srv]

## Import Packages

In [None]:
import os
import time
from decouple import config
import tweepy
import pandas as pd
import json
from pymongo import MongoClient

print("Packages imported successfully.")

## Retrieve API access details from .env

In [None]:
consumer_key = config('API-KEY')
consumer_secret = config('API-SECRET-KEY')
access_token = config('ACCESS-TOKEN')
access_token_secret = config('ACCESS-TOKEN-SECRET')

print("API access details retrieved successfully.")

## Authenticating User

In [None]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth, wait_on_rate_limit=True)

if(not api):
    print("Authentication failed!")
    sys.exit(-1)

print("Authentication successful.")

## Scrap data and store in Dataframe

In [None]:
def scrape_tweets(api_obj:object, query:str, tweets_per_request:int, max_requests:int):
    tweets_df = pd.DataFrame(columns=['tweet', 'id', 'source', 'coordinates', 'retweetCount', 'likeCount', 'username', 'screenName', 'location', 'friendsCount', 'verificationStatus', 'description', 'followersCount'])
    
    tweets_list = []

    for i in range(0, max_requests):
        response = tweepy.Cursor(api_obj.search, q=query, lang='en', tweet_mode='extended').items(tweets_per_request)
        
        tweets_list = tweets_list + [tweet for tweet in response]

    for tweet in tweets_list:
        if not hasattr(tweet, 'retweeted_status'):
            text = tweet.full_text
            id = tweet.id_str
            source = tweet.source
            coordinates = tweet.coordinates
            retweetCount = tweet.retweet_count
            likeCount = tweet.user.favourites_count
            username = tweet.user.name
            screenName = tweet.user.screen_name
            location = tweet.user.location
            friends = tweet.user.friends_count
            verification = tweet.user.verified
            description = tweet.user.description
            followers = tweet.user.followers_count

            ith_tweet = [text, id, source, coordinates, retweetCount, likeCount, username, screenName, location, friends, verification, description, followers]

            tweets_df.loc[len(tweets_df)] = ith_tweet

    return tweets_df


In [None]:
query = "Messi"
tweets_no = 50
max_requests = 2

response = scrape_tweets(api, query, tweets_no, max_requests)

print(len(response))

In [None]:
response.head(2) 

In [None]:
def save_results_as_csv(df):
    path = os.getcwd() + '\data\\'
    current_timestamp = time.strftime("%y%m%d_%H%M%S")
    
    if not os.path.exists(path):
        os.mkdir(path)
        
    filename = 'tweets_downloaded_' + current_timestamp + '.csv'
    
    fullname = os.path.join(path, filename)
    
    df.to_csv(fullname, index=False)

In [None]:
save_results_as_csv(response)

## Store data in MongoDB

In [None]:
db_name = config('DB-NAME')
db_password = config('DB-PASSWORD')

def store_data_in_mongodb(df):
    client = MongoClient('mongodb+srv://kingsabru:{0}@cluster0.cz8qq.gcp.mongodb.net/{1}?retryWrites=true&w=majority'.format(db_password, db_name))
    
    db = client.get_database(db_name)
    coll = db.raw_tweets
    
    json_data = json.loads(df.to_json(orient='records'))
    
    coll.insert_many(json_data)

In [None]:
store_data_in_mongodb(response)