In [1]:
#creating the word.csv_file

In [2]:
#read the pictures
# use google vision
#create the csv file
#save the file

In [31]:
import pandas as pd
from google.cloud import vision
from google.cloud.vision import types
from google.oauth2 import service_account
import os
import io
import json
from google.protobuf.json_format import MessageToDict
import time
from sklearn.pipeline import Pipeline


import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [32]:
IMAGES_DIR = './Rubbled houses/'
CONFIG_FILE = './config.json'
WORDS_FILE = 'words_from_google_vision_1.csv'

In [33]:
GOOGLE_SERVICE_KEY = pd.read_json(CONFIG_FILE)["KEYS"]["GOOGLE_SERVICE_KEY"]
credentials = service_account.Credentials.from_service_account_file(GOOGLE_SERVICE_KEY)
client = vision.ImageAnnotatorClient(credentials=credentials)

In [34]:
def get_google_vision_words(file_name):

    # Loads the image into memory
    with io.open(file_name, 'rb') as image_file:
        content = image_file.read()
    image = types.Image(content=content)
    response = client.label_detection(image=image, max_results=100)
    time.sleep(5)
    res = MessageToDict(response)

    return res

In [35]:
def get_labels(e, res, col_no):    
    new_lst = []
    for i in range(len(res["labelAnnotations"])):
        new_lst.append(res["labelAnnotations"][i]["description"])
    
    return((e, new_lst)) 

In [36]:
def create_google_words_csv():
    image_files = os.listdir(IMAGES_DIR)
    label_d_lst = []
    
    for i, img in enumerate(image_files):
        print("\nReading  : {}".format(img))
        res = get_google_vision_words(IMAGES_DIR + img)
        print("Got from vision  : {}".format(img))
        label_d_lst.append(get_labels(img, res, i))
        print("Done : {}".format(img))
    
    pd.DataFrame(label_d_lst).to_csv(WORDS_FILE)
    print("{} saved".format(WORDS_FILE))

In [37]:
#######################

In [38]:
## Read the words file and build a model

In [39]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV

def get_estimator():
    
    data = pd.read_csv(WORDS_FILE)
    data.drop(columns="Unnamed: 0", inplace=True)
    X=data["Words"]
    y=data["TRUE"]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, stratify=y, test_size=0.20, random_state=42)
    
    params = {
    1: {"Pipeline": [('vectorizer', CountVectorizer()),
                     ('classifier', LogisticRegressionCV(random_state=42))],
        "hyper_params": {
            'vectorizer__ngram_range': [(1,1), (1,2), (1,3), (1,4)]}
        }
    }
    
    pipe = Pipeline(params[1]["Pipeline"])
    hyper_params = params[1]["hyper_params"]

    # Perform Grid Search
    gridcv = GridSearchCV(pipe,
                          param_grid=hyper_params,
                          cv=5,
                          scoring="accuracy")
    return gridcv.fit(X_train, y_train).best_estimator_

In [40]:
#######################

In [41]:
##Read the damaged photo
## go to google vision and get the words
## then predict

In [42]:
image = 'https://firebasestorage.googleapis.com/v0/b/fema-damage-report.appspot.com/o/images%2F1582004151708_15_25.jpg?alt=media&token=1c4eb4d0-c631-4159-9237-8d283cf23890'

In [43]:
from PIL import Image
import requests
from io import BytesIO

def get_image_google_vision_words(url):

    response_img = requests.get(url)
    image = types.Image(content=response_img.content)
    response = client.label_detection(image=image, max_results=100)
    time.sleep(5)
    res = MessageToDict(response)

    return res

In [44]:
def get_prediction(image_url): 
    
    data_from_img = []
    model = get_estimator()
    res = get_image_google_vision_words(image_url)
    data_from_img.append(get_labels(image_url, res, 0))
    df = pd.DataFrame(data_from_img, columns=["url", "Words"])
    df["Words"] = df["Words"].astype(str)
    
    return model.predict(df["Words"])

In [46]:
val = get_prediction(image)



In [49]:
str(val[0])

'1'