# In

In [None]:
from flask import Flask
from flask import request
import joblib
import pandas as pd
# Import libraries
import joblib
import pickle
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression


from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import preprocessor as p

from pymongo import MongoClient

import time
import json

import re
from flask import Flask
from flask import json
from flask import request
from datetime import datetime,timedelta
from elasticsearch import Elasticsearch

# Loading Data

In [None]:
def load_data(messages_filepath, categories_filepath):

    messages = pd.read_csv(messages_filepath)
    categories = pd.read_csv(categories_filepath)
    df = pd.merge(messages, categories, on='id', how='left')
    return df


def clean_data(df):

    # Expand categories into separate columns
    categories = df.categories.str.split(';', expand=True)
    colnames = categories.iloc[0].str.split('-', expand=True)[0].tolist()
    categories.columns = colnames
    
    # Clean values and convert to numeric if the category is not constant
    for column in categories.columns:
        if categories[column].nunique() > 1:
            categories[column] = categories[column].apply(lambda r: r[-1]).astype(int)
        else:
            categories.drop(column, axis=1, inplace=True)
        
    # Combine original df and expanded categories
    return pd.concat([df.drop('categories', axis=1), categories], axis=1).drop_duplicates()

In [None]:
messages_filepath = 'data/disaster_messages.csv'
categories_filepath = 'data/disaster_categories.csv'
df = load_data(messages_filepath, categories_filepath)


# Cleaning Data

In [None]:
print('Cleaning data...')
cleaneddf = clean_data(df)

In [None]:
X = cleaneddf['message'].copy()
Y = cleaneddf.iloc[:, 4:].copy()
Y.head()

# Model Building

In [None]:
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', MultiOutputClassifier(LogisticRegression(max_iter=1000, random_state=0)))
])

In [None]:
# Parameter grid to search
parameters = {
    'tfidf__max_df': [0.01, 0.1, 0.2],
    'tfidf__max_features': [None, 1000, 10000],
    'tfidf__ngram_range': [(1, 1), (2, 2), (3, 3)]
}

In [None]:
gs = GridSearchCV(pipeline, parameters, cv=4, n_jobs=12, verbose=2)
gs.fit(X, Y)

gs.best_params_

In [None]:
logreg = gs.best_estimator_

In [None]:
filename = 'finalized_model_14may.sav'
pickle.dump(logreg, open(filename, 'wb'))

In [None]:
loaded_model = pickle.load(open(filename, 'rb'))

In [None]:
labels = ['related', 'request', 'offer', 'aid_related', 
'medical_help', 'medical_products',
'search_and_rescue', 'security', 'military', 
'child_alone', 'water', 'food', 'shelter', 
'clothing', 'money', 'missing_people', 'refugees', 
'death', 'other_aid', 'infrastructure_related', 
'transport', 'buildings', 'electricity', 'tools', 
'hospitals', 'shops', 'aid_centers', 
'other_infrastructure', 'weather_related', 
'floods', 'storm', 'fire', 'earthquake', 'cold', 
'other_weather', 'direct_report']

In [None]:
pred_probs = loaded_model.predict_proba(["comes to the rescue of a COVID-19 positive patient"])
pred_prob = np.argmax([row[0][1] for row in pred_probs])
labels[pred_prob]

In [None]:
pred_probs = loaded_model.predict_proba([": We need Ur help"])
pred_prob = np.argmax([row[0][1] for row in pred_probs])
labels[pred_prob]

In [None]:
pred_probs = loaded_model.predict_proba(["Need Blood and Need plasma From Covid recovered patientAt"])
pred_prob = np.argmax([row[0][1] for row in pred_probs])
labels[pred_prob]

# ------------ APIS ----------------

In [1]:
import pickle
import pandas as pd
import numpy as np
import re
from flask import Flask
from flask import json
from flask import request
from datetime import datetime,timedelta
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

In [3]:

import pickle
import pandas as pd
import numpy as np
import re
from flask import Flask
from flask import json
from flask import request
from datetime import datetime,timedelta
from elasticsearch import Elasticsearch
es = Elasticsearch(hosts=["http://3.238.229.207:9200/"])
app = Flask(__name__)

filename = 'finalized_model_14may.sav'

labels = ['related', 'request', 'offer', 'aid_related', 
'medical_help', 'medical_products',
'search_and_rescue', 'security', 'military', 
'child_alone', 'water', 'food', 'shelter', 
'clothing', 'money', 'missing_people', 'refugees', 
'death', 'other_aid', 'infrastructure_related', 
'transport', 'buildings', 'electricity', 'tools', 
'hospitals', 'shops', 'aid_centers', 
'other_infrastructure', 'weather_related', 
'floods', 'storm', 'fire', 'earthquake', 'cold', 
'other_weather', 'direct_report']

#tweet = content['text']
loaded_model = pickle.load(open(filename, 'rb'))

@app.route('/get_intent_predictions', methods = ['POST'])

def get_prob():
    
    try:

        tweet = request.form['tweettext']
        #print(tweet)
        #predictions = loaded_model.predict([tweet])
        #result = np.where(predictions == 1)
        #print("result",result)
        #preds = [labels[xi] for xi in result[1]]
        pred_probs = loaded_model.predict_proba([cleanedtweet])
        pred_prob = np.argmax([row[0][1] for row in pred_probs])
        preds = labels[pred_prob]
        print("PREDS-------",preds)
    except Exception as e:
        print("Exception",e)
        preds = "related"
        
    
    return str(preds)


@app.route('/get_last_1mindata', methods = ['POST'])
def get_lastmindata():
    result = es.search(index="twitter_india_covid", body={
        "query": {
            "range": {
                "@timestamp": {
                    "gte": datetime.utcnow() - timedelta(minutes=15),
                    "lt": datetime.utcnow()
                }
            }
        },
        # ensure that we return all docs in our test corpus
       
    })
    lis = []
    for item in result['hits']['hits']:
        lis.append(item['_source'])
    
    responses = pd.DataFrame(lis).to_json(orient="records")
    return responses

@app.route('/get_fulldata', methods = ['POST'])
def get_fulldata():
    s = Search(using=es, index="twitter_india_covid")
    df = pd.DataFrame([hit.to_dict() for hit in s.scan()])
    responses = df.to_json(orient="records")
    return responses             
                      

@app.route('/semantic_search', methods = ['POST'])
def search():
    queries = str(request.form['userquery'])
    query = {
        "size": 30,
        "query": {
            "query_string": {"query": queries}
        }
    }

    results = []
    for result in es.search(index="twitter_india_covid", body=query)["hits"]["hits"]:
        source = result["_source"]
        print(source)
        results.append((min(result["_score"], 18) / 18, source["text"]))
        
    #similarity = Similarity("valhalla/distilbart-mnli-12-3")
    #results = [text for _, text in search(query, limit * 10)]
    #return [(score, results[x]) for x, score in similarity(query, results)][:limit]
    
    responses = pd.DataFrame(results,columns = ['results','Text']).to_json(orient="records")

    return responses


if __name__ == '__main__':
    app.run(port=5009)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5009/ (Press CTRL+C to quit)
127.0.0.1 - - [14/May/2021 13:53:15] "[37mPOST /get_last_1mindata HTTP/1.1[0m" 200 -
