In [6]:
from gensim.models import KeyedVectors
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

model_path = 'GoogleNews-vectors-negative300.bin'
model = KeyedVectors.load_word2vec_format(model_path, binary=True)

def get_vector(phrase, model):
    words = phrase.split()
    word_vectors = [model[word] for word in words if word in model]
    if word_vectors:
        return np.mean(word_vectors, axis=0)
    else:
        return None

def find_closest(word_list, new_word, model):
    new_word_vector = get_vector(new_word, model)
    if new_word_vector is None:
        return None, None
    
    max_similarity = -1
    closest_word = None
    for word in word_list:
        word_vector = get_vector(word, model)
        if word_vector is not None:
            similarity = cosine_similarity([new_word_vector], [word_vector])[0][0]
            if similarity > max_similarity:
                max_similarity = similarity
                closest_word = word
    
    return closest_word, max_similarity

# Example usage
word_list = ['Partly Cloudy', 'Mostly Cloudy', 'Overcast', 'Foggy', 'Breezy and Mostly Cloudy', 'Clear',
    'Breezy and Partly Cloudy', 'Breezy and Overcast', 'Humid and Mostly Cloudy', 'Humid and Partly Cloudy',
    'Windy and Foggy', 'Windy and Overcast', 'Breezy and Foggy', 'Windy and Partly Cloudy', 'Breezy',
    'Dry and Partly Cloudy', 'Windy and Mostly Cloudy', 'Dangerously Windy and Partly Cloudy', 'Dry',
    'Windy', 'Humid and Overcast', 'Light Rain', 'Drizzle', 'Windy and Dry', 'Dry and Mostly Cloudy',
    'Breezy and Dry', 'Rain']
wordi=['snow']

new_word = "snowing"
closest_word, similarity = find_closest(wordi, new_word, model)
if closest_word:
    print(f"The closest word to '{new_word}' is '{closest_word}' with a similarity of {similarity:.4f}")
else:
    print(f"No close word found for '{new_word}'")


The closest word to 'snowing' is 'snow' with a similarity of 0.6486


In [16]:
import gensim.downloader as api
from gensim.models import KeyedVectors
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from typing import List, Set

# Downloading the Word2Vec model
model_path = 'GoogleNews-vectors-negative300.bin'
model = KeyedVectors.load_word2vec_format(model_path, binary=True)

# Define weather-related words
weather_related_words: Set[str] = {
    "sun", "rain", "cloud", "storm", "snow", "hot", "cold", "summer", "autumn", "avalanche", "breeze", "cloudburst",
    "cloud", "cold", "wave", "humid", "dry", "wind", "little", "and", "moderate", "cyclone", "dew", "downburst",
    "downwind", "drizzle", "storm", "dust", "sunlight", "fall", "fair", "flood", "fog", "freeze", "frost", "cloud",
    "heat", "hurricane", "chill", "lightning", "mist", "overcast", "shower", "thunder", "warm"
}

# Define stop words and negations
stop_words: Set[str] = set(stopwords.words('english'))
negations: Set[str] = {"not", "no", "never", "none"}

# List of words/phrases to compare with
comparison_list: List[str] = [
    'Partly Cloudy', 'Mostly Cloudy', 'Overcast', 'Foggy', 'Breezy and Mostly Cloudy', 'Clear',
    'Breezy and Partly Cloudy', 'Breezy and Overcast', 'Humid and Mostly Cloudy', 'Humid and Partly Cloudy',
    'Windy and Foggy', 'Windy and Overcast', 'Breezy and Foggy', 'Windy and Partly Cloudy', 'Breezy',
    'Dry and Partly Cloudy', 'Windy and Mostly Cloudy', 'Dangerously Windy and Partly Cloudy', 'Dry',
    'Windy', 'Humid and Overcast', 'Light Rain', 'Drizzle', 'Windy and Dry', 'Dry and Mostly Cloudy',
    'Breezy and Dry', 'Rain'
]

# Function to extract phrases
def extract_weather_phrases(sentence: str) -> List[str]:
    words = word_tokenize(sentence)
    phrases = []
    current_phrase = []
    in_phrase = False

    for word in words:
        if word in weather_related_words or word in stop_words or word in negations:
            current_phrase.append(word)
            in_phrase = True
        else:
            if in_phrase:
                phrases.append(" ".join(current_phrase))
                current_phrase = []
                in_phrase = False
    if in_phrase:
        phrases.append(" ".join(current_phrase))

    return phrases

# Function to replace phrases in the sentence
def replace_phrases(sentence: str, phrases: List[str]) -> str:
    for phrase in phrases:
        for comparison in comparison_list:
            try:
                similarity = model.similarity(phrase, comparison)
                if similarity > 0.5:
                    sentence = sentence.replace(phrase, comparison)
                    break
            except KeyError:
                # Handle the case where the phrase or comparison word is not in the vocabulary
                continue
    return sentence

# Main function to process the sentence
def process_sentence(sentence: str) -> str:
    phrases = extract_weather_phrases(sentence)
    updated_sentence = replace_phrases(sentence, phrases)
    return updated_sentence

# Example usage
sentence = "It is going to rain and it might snow later but the weather is unpredictable."
updated_sentence = process_sentence(sentence)
print(updated_sentence)


It is going to rain and it might snow later but the weather is unpredictable.


In [14]:
# Import required libraries
import spacy

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# Function to extract conditions from a sentence
def extract_conditions(sentence):
    # Parse the sentence using the NLP model
    doc = nlp(sentence)
    
    # Initialize a list to hold condition phrases
    conditions = []
    
    # Traverse the dependency tree to find condition indicators
    for token in doc:
        if token.text.lower() in {"when", "if"}:
            # Get the subtree of the token representing the condition
            condition_phrase = [token.text]
            for right in token.rights:
                condition_phrase.append(right.text)
                condition_phrase.extend([t.text for t in right.subtree if t != right])
            conditions.append(" ".join(condition_phrase))
    
    # Return the list of condition phrases
    return conditions

# Example usage
sentence = "Give temperature when it's hot and humid."
conditions = extract_conditions(sentence)
print(conditions)


['when']


In [21]:
import re

def reformat_sentence(sentence):
    # Define patterns to identify "what" and "condition"
    what_pattern = re.compile(r"(what(?: will| is| are| can be)?|find|give|get|retrieve)?\s*([^,]+?)\s*(?:when|if|given|because|due to|who|that|be)", re.IGNORECASE)
    condition_pattern = re.compile(r"(?:when|if|given|because|due to|who|that|be)\s+(.+)$", re.IGNORECASE)

    # Search for patterns in the sentence
    what_match = what_pattern.search(sentence)
    condition_match = condition_pattern.search(sentence)
    
    # Extract matches
    what = what_match.group(2).strip() if what_match else "unknown"
    condition = condition_match.group(1).strip() if condition_match else "unknown condition"
    
    # Format the output
    formatted_sentence = f"give me {what} given {condition}"
    
    return formatted_sentence

# Example usage
input_sentence = "humidity and wind speed if the weather is cold but not humid and temperature is 15.2"
formatted_sentence = reformat_sentence(input_sentence)
print(formatted_sentence)


give me humidity and wind speed given the weather is cold but not humid and temperature is 15.2
