# Chunk Finder for Meaningful Forms

## Imports 

In [1]:
import collections
from pathlib import Path

import numpy as np
import pandas as pd
import spacy
from spacy import displacy
from spacy.matcher import PhraseMatcher
import textacy

pd.set_option("display.max_rows", 600)
pd.set_option("display.max_columns", 500)
pd.set_option("max_colwidth", 400)


spacy.require_gpu()
spacy.prefer_gpu()
nlp = spacy.load("en_core_web_trf")


In [12]:
from sentence_transformers import SentenceTransformer
from sklearn.cluster import AgglomerativeClustering
import numpy as np
import pandas as pd

embedder = SentenceTransformer('all-mpnet-base-v2')
pd_cluster = pd.read_csv('D:\\NLP\\Frame_NLP\\archive\\Sent_Cluster\\labeled_clusters99.csv')

## Small Cluster Sentence Similarity Checker

In [None]:
# Corpus with example sentences
corpus_sents = pd_cluster[pd_cluster['category']=='reports_case_coronavirus_tally']

corpus_sents.head()

In [None]:
corpus = corpus_sents['Processed_Title'].tolist()
corpus[:20]

In [None]:
corpus_embeddings = embedder.encode(corpus)

In [None]:
# Normalize the embeddings to unit length
corpus_embeddings = corpus_embeddings /  np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)

from sklearn.metrics.pairwise import cosine_similarity

cosine_similarity(
    [corpus_embeddings[0]],
    corpus_embeddings[1:]
)


## 1229 Categories Clustering

In [22]:
pd_category = pd.read_csv('D:\\NLP\\Frame_NLP\\archive\\Sent_Cluster\\label_freq.csv')

In [23]:
column_to_remove = ['label']
corpus_categories = pd_category[column_to_remove].replace({'_':' '}, regex=True)
corpus_categories[:10]

Unnamed: 0,label
0,coronavirus coronavirus case
1,reports case coronavirus death
2,confirms case coronavirus
3,reports case coronavirus
4,coronavirus market
5,race vaccine coronavirus
6,coronavirus pinch holiday
7,coronavirus lockdown
8,coronavirus transmission glance
9,canceled fear coronavirus concern


In [25]:
cat_corpus = corpus_categories['label'].tolist()
cat_corpus[:10]

['coronavirus coronavirus case',
 'reports case coronavirus death',
 'confirms case coronavirus',
 'reports case coronavirus',
 'coronavirus market',
 'race vaccine coronavirus',
 'coronavirus pinch holiday',
 'coronavirus lockdown',
 'coronavirus transmission glance',
 'canceled fear coronavirus concern']

In [26]:
corpus_embeddings = embedder.encode(cat_corpus)

In [41]:
# Perform kmean clustering
clustering_model = AgglomerativeClustering(n_clusters=None, distance_threshold=1.5) #, affinity='cosine', linkage='average', distance_threshold=0.4)
clustering_model.fit(corpus_embeddings)
cluster_assignment = clustering_model.labels_

clustered_sentences = {}
for sentence_id, cluster_id in enumerate(cluster_assignment):
    if cluster_id not in clustered_sentences:
        clustered_sentences[cluster_id] = []

    clustered_sentences[cluster_id].append(cat_corpus[sentence_id])

f = open('D://NLP//Frame_NLP//archive//Sent_Cluster//coronavirus_Frames.txt', 'w', encoding='utf-8')
#f = open('D://NLP//Frame_NLP//archive//Sent_Cluster//coronavirus_frames.csv', 'w', encoding='utf-8')

for i, cluster in clustered_sentences.items():

    print("Cluster {} \t\t {}".format(i+1, cluster), file=f)
    #print("Cluster %(n)s \t\t %(b)s" % {'n': i+1, 'b': cluster}, file=f)

f.close()

In [43]:
category_clusters = pd.read_csv('D://NLP//Frame_NLP//archive//Sent_Cluster//coronavirus_Frames.txt', sep='\t\t', encoding='utf-8')
category_clusters.head()
category_clusters.columns.values[0] = "Cluster Number"
category_clusters.columns.values[1] = "List of Frame Aspects"

display(category_clusters)

Unnamed: 0,Cluster 90,"['coronavirus coronavirus case', 'cases case coronavirus', 'came infections coronavirus case', 'coronavirus epidemic case', 'coronavirus thrombus case', 'case case coronavirus', 'suggests infection coronavirus case', 'case case coronavirus health', 'case case coronavirus 1st', 'cases case coronavirus ministry', 'isolated member coronavirus case', 'coronavirus case', 'crosses cases coronavirus', 'turns coronavirus case', 'sees case coronavirus', 'case coronavirus', 'cases coronavirus case', 'cases coronavirus infection', 'cases coronavirus patient', 'cases 10,000 coronavirus case', 'arrived cases coronavirus carrier', 'cases coronavirus', 'cases coronavirus investigation']"
0,Cluster 86,"['reports case coronavirus death', 'dies victim coronavirus doctor', 'reports death coronavirus', 'confirms death coronavirus', 'reaches official death coronavirus', 'reports death coronavirus doctor', 'announces death case coronavirus', 'dies covid person coronavirus', 'confirms case coronavirus death', 'reports death coronavirus case', 'reports death coronavirus person', 'cleared death coro..."
1,Cluster 60,"['confirms case coronavirus', 'confirms case coronavirus governor', 'confirmed case coronavirus', 'confirmed coronavirus case', 'confirms case coronavirus police', 'confirms case coronavirus chief', 'confirms case coronavirus month', 'confirms case coronavirus report', 'confirms case president coronavirus', 'confirmed case coronavirus virus', 'confirms case coronavirus minister', 'confirms ca..."
2,Cluster 14,"['reports case coronavirus', 'reports loss case coronavirus', 'reports spike case coronavirus', 'reports case coronavirus tally', 'newsletter hospitalizations coronavirus outbreak', 'reported news coronavirus case', 'reported case coronavirus', 'reports rise coronavirus death', 'records death coronavirus patient', 'reports case coronavirus hospital', 'records case coronavirus 13th', 'records ..."
3,Cluster 78,"['coronavirus market', 'coronavirus economy', 'coronavirus growth economy outbreak', 'coronavirus recession economy', 'says depression coronavirus economy', 'warns recession coronavirus', 'crisis economy coronavirus lockdown', 'coronavirus potential case economy', 'lawmaker coronavirus economy factor']"
4,Cluster 38,"['race vaccine coronavirus', 'coronavirus authoritarianism opinion', 'speak negros coronavirus racism', 'cancels race coronavirus outbreak', 'coronavirus minority', 'races surge case coronavirus', 'discriminate coronavirus supremacist', 'infected racism coronavirus opinion', 'use coronavirus racism excuse', 'stop spread racism coronavirus']"


In [108]:
df = pd.DataFrame()

cluster_dic = {}

for i, cluster in clustered_sentences.items():
    cluster_dic = {**cluster_dic, **{"Cluster {}".format(i+1): cluster}}
    
df = pd.DataFrame.from_dict(cluster_dic, orient = 'index')
df = df.T
df = df.replace({' ':'_'}, regex=True)

df.to_csv('D://NLP//Frame_NLP//archive//Sent_Cluster//coronavirus_Frames.csv')

In [110]:
Frames = pd.read_csv('D://NLP//Frame_NLP//archive//Sent_Cluster//coronavirus_Frames.csv', index_col=0)
Frames.head(50)

Unnamed: 0,Cluster 90,Cluster 86,Cluster 60,Cluster 14,Cluster 78,Cluster 38,Cluster 19,Cluster 98,Cluster 7,Cluster 27,Cluster 56,Cluster 63,Cluster 22,Cluster 12,Cluster 46,Cluster 26,Cluster 8,Cluster 37,Cluster 68,Cluster 49,Cluster 71,Cluster 33,Cluster 29,Cluster 100,Cluster 5,Cluster 24,Cluster 28,Cluster 84,Cluster 85,Cluster 16,Cluster 54,Cluster 13,Cluster 53,Cluster 25,Cluster 15,Cluster 74,Cluster 20,Cluster 82,Cluster 94,Cluster 42,Cluster 51,Cluster 59,Cluster 1,Cluster 99,Cluster 91,Cluster 58,Cluster 72,Cluster 43,Cluster 93,Cluster 101,Cluster 3,Cluster 39,Cluster 30,Cluster 31,Cluster 57,Cluster 52,Cluster 79,Cluster 64,Cluster 73,Cluster 66,Cluster 17,Cluster 76,Cluster 96,Cluster 32,Cluster 45,Cluster 67,Cluster 77,Cluster 23,Cluster 50,Cluster 88,Cluster 36,Cluster 70,Cluster 34,Cluster 48,Cluster 10,Cluster 18,Cluster 69,Cluster 35,Cluster 83,Cluster 21,Cluster 44,Cluster 92,Cluster 80,Cluster 55,Cluster 47,Cluster 6,Cluster 89,Cluster 2,Cluster 61,Cluster 95,Cluster 41,Cluster 40,Cluster 4,Cluster 81,Cluster 97,Cluster 11,Cluster 75,Cluster 62,Cluster 87,Cluster 9,Cluster 65
0,coronavirus_coronavirus_case,reports_case_coronavirus_death,confirms_case_coronavirus,reports_case_coronavirus,coronavirus_market,race_vaccine_coronavirus,coronavirus_pinch_holiday,coronavirus_lockdown,coronavirus_transmission_glance,canceled_fear_coronavirus_concern,closes_school_coronavirus,urges_people_coronavirus_scare,coronavirus_coronavirus_update_look,test_immunity_coronavirus_antibody,coronavirus_restriction_patient,coronavirus_event_sport,coronavirus_coronavirus_cure,looms_disinformation_coronavirus_misinformation,declares_emergency_coronavirus,coronavirus_nightmare_prayer,got_case_coronavirus_jail,system_result_coronavirus_state,surges_case_coronavirus,coronavirus_ward_hospital,coronavirus_coronavirus_player,says_risk_coronavirus_airline,reaches_112_coronavirus_death,rises_coronavirus_death,coronavirus_thing_home_care,coronavirus_development_nrl,enforce_measure_coronavirus_government,changing_disruption_coronavirus_business,passes_bill_coronavirus_relief,recovered_recovery_coronavirus_case,says_research_coronavirus_hacker,coronavirus_coronavirus_quarantine,closes_border_coronavirus,coronavirus_positive_ship_cruise,plunge_sell_market_coronavirus,coronavirus_dad_librarian,tests_negative_coronavirus_doctor,closed_facility_coronavirus_tourist,spread_coronavirus_dog_cat,coronavirus_update_case,approves_test_coronavirus,voted_hotspot_coronavirus_case,tests_positive_coronavirus_symptom,coronavirus_life_outbreak,says_return_coronavirus_job,tests_positive_owner_knick,coronavirus_mass_church,worried_shot_coronavirus_flu,pence_member_coronavirus_fight,latest_cases_death_case,"coronavirus_6,000_case",coronavirus_ventilator,think_misinformation_coronavirus_beer,says_coronavirus_patient,extend_measure_coronavirus_case,coronavirus_mask_pandemic,halts_warranty_coronavirus_day,handles_shopper_coronavirus_staff,grow_coronavirus_scientist_lab,coronavirus_coronavirus_risk,coronavirus_movie,spend_fund_coronavirus_response,ready_stimulus_coronavirus_temper,seek_hand_coronavirus_govts,signs_package_coronavirus_emergency,builds_awareness_coronavirus,updates_beaches_coronavirus_pandemic,tells_china_coronavirus_outbreak,report_app_coronavirus,confirmed_amvca_case_isolation,coronavirus_self_victim,pizzeria_customer_coronavirus_dining,declares_curfew_coronavirus_lockdown,says_response_whos_curve,coronavirus_uk_travel_ban,play_role_woman_parent,podcast_podcast_time_houseparty,aged_coronavirus_woman,hospitalized_president_wife,dies_ambassador_leader_aide,protocol_plan_instruction_disease,suffer_blow_etfs_opportunity,timeline_sport_response,rises_9692_death_toll,updates_update_world,underlines_need_effort_govt,tightens_case_tourist_emergency,desantis_millions_case_state,announces_recovery_patient_death,rises_number_case,reports_spike_view_industry,help_game_developer,fall_price_oil,update_dw_birthday,waives_fee_coronavirus_consumer,bet_opinion_cure,news_ship_cruise
1,cases_case_coronavirus,dies_victim_coronavirus_doctor,confirms_case_coronavirus_governor,reports_loss_case_coronavirus,coronavirus_economy,coronavirus_authoritarianism_opinion,cases_case_coronavirus_morning,coronavirus_restriction_lockdown,coronavirus_chains_effect,postponed_anniversary_coronavirus_fear,coronavirus_closure_school,coronavirus_weekend_panic,coronavirus_appearance_official,test_people_coronavirus,treated_limit_coronavirus_patient,coronavirus_exercise_case_week,coronavirus_coronavirus_disinfectant,claims_hunch_coronavirus_expert,declares_emergency_health,spread_positive_coronavirus_protest,test_coffin_coronavirus_inmate,reports_coronavirus_region,rise_candle_coronavirus_case,hospital_coronavirus_field,coronavirus_crisis_celebrity,suspends_flight_coronavirus_disease,number_coronavirus,rises_life_coronavirus_death,hits_home_nursing_coronavirus,coronavirus_adeboye_news,tightens_rule_coronavirus_crisis,help_recovery_coronavirus_firm,signs_bill_coronavirus_spending,coronavirus_discharge_patient,coronavirus_message_people,distancing_spread_coronavirus,closes_border_coronavirus_rail,coronavirus_season_cruise_passenger,leads_price_coronavirus_pandemic,coronavirus_child,tests_positive_coronavirus_exposure,coronavirus_hotel_quarantine,buddy_positive_dog_coronavirus,watch_update_governor_coronavirus,passes_trial_vaccine_coronavirus,expands_change_airport_coronavirus,adds_symptom_coronavirus,latest_coronavirus_spread,trickling_job_coronavirus_state,tests_positive_basketball_legend,answered_question_coronavirus,vaccine_vaccine_coronavirus,says_counterattack_coronavirus_trump,cases_record_case_death,coronavirus_population_pandemic,sending_ventilator_coronavirus_spot,surge_virus_beer,says_case_coronavirus_official,stock_rally_coronavirus_%,protect_mask_coronavirus_face,returns_boost_company_coronavirus,closes_factory_coronavirus_concern,regulator_coronavirus_fishery,photos_people_coronavirus_lung,robots_coronavirus_robot,sends_mourn_coronavirus_pandemic,helps_cold_coronavirus_allergy,launches_service_coronavirus_government,turbografx_production_coronavirus_mini,tips_coronavirus_mind,expo_paddlefest_pandemic_fear,coronavirus_wuhan_death,coronavirus_phone_time,accelerated_sparkle_warning_cava,coronavirus_name_victim,coronavirus_covid_food,closes_casinos_coronavirus_spread,says_appetite_consumerist_design,extends_flight_coronavirus_country,suggests_urine_cow_yagna,holds_briefing_press_task,coronavirus_birth_woman,symptoms_symptom_difference_nausea,dies_official_adviser,covid_notification_commentator_life,warns_impact_economy_trump,cancels_season_league_soccer,"updates_1,223_death_toll",updates_lockdown_case,proposes_fund_chieftain_government,reopen_australia_gym_crowd,spreading_disinformation_scammer_million,recover_patient_%,rises_case_number,role_intercity_bus,20,plunges_%_oil,trial_vaccine_month,waives_fee_coronavirus_indigo,shuts_school_outbreak,grows_case_cruise_ship
2,came_infections_coronavirus_case,reports_death_coronavirus,confirmed_case_coronavirus,reports_spike_case_coronavirus,coronavirus_growth_economy_outbreak,speak_negros_coronavirus_racism,opens_coronavirus_day,coronavirus_briefing_lockdown,confirmed_coronavirus_60,postponed_match_coronavirus_force,diagnosed_coronavirus_student,terrifies_coronavirus_awareness,seriously_outbreak_coronavirus_map,tests_test_coronavirus,denies_case_coronavirus_death,dies_coronavirus_wrestler,researchers_coronavirus_drug,warned_misinformation_coronavirus_report,declares_emergency_coronavirus_health,appeal_coronavirus_pm,tests_coronavirus_inmate,reports_case_coronavirus_state,rise_stimulus_coronavirus_case,admitted_symptom_coronavirus_hospital,celebrities_coronavirus_celebrity,resume_flight_coronavirus_airline,"pass_20,000_coronavirus_death",death_help_coronavirus_returnee,bungie_employees_home_coronavirus,reports_case_coronavirus_hour,suppress_content_g_coronavirus,faces_threat_coronavirus_shortage,reaches_deal_coronavirus_bill,recovers_coronavirus_patient,says_website_coronavirus_self,coronavirus_orphanage_prevention,coronavirus_deadline_subway,suspend_cruise_coronavirus_month,timing_growth_coronavirus_stock,confirms_coronavirus_daughter,tests_positive_coronavirus_officer,coronavirus_foreigner_hotel,coronavirus_transfusions_plasma,updates_coverage_coronavirus_pandemic,passes_package_coronavirus_stimulus,mandalay_rule_coronavirus_travel,symptoms_coronavirus_expert,coronavirus_world,coronavirus_employment_pandemic,require_test_quarantine_physician,speaks_crisis_coronavirus_bra,ready_breakthrough_coronavirus_vaccine,mission_bill_coronavirus_trump,person_case_resident,"coronavirus_100,000_pandemic",updates_ventilator_patient_isolation,refusing_cup_coronavirus,says_6mln_coronavirus_people,adopt_measure_coronavirus_end,coronavirus_passenger_mask,outlook_state_coronavirus_reopening,closes_store_coronavirus_week,production_production_coronavirus_concern,transmit_coronavirus_risk,watch_hall_coronavirus_town,sends_aid_coronavirus_fight,says_matter_coronavirus_climate,awaits_inhabitants_coronavirus_government,unveils_package_coronavirus_death,learn_limit_coronavirus_crisis,canceled_open_surfing_pandemic,peak_china_coronavirus_day,cracks_misinformation_coronavirus_app,finds_area_impact,coronavirus_migrant_immigrant_ice,dies_coronavirus_pizza,curfews_curfew_summer_coronavirus,found_sample_person,dies_country_coronavirus,buying_paper_toilet_mum,blamed_hearings_trial_delay,dies_coronavirus_woman,hospitalized_symptom_condition,dies_officer_hundred,care_medium_news_cycle,ways_security_impact,live_event_ea_esport,takes_toll_home,updates_number_case,drives_spending_government_level,alleged_riot_prison_rumor,uniting_misinformation_giant_company,released_rest_concern_home,rises_200,reports_intelligence,pm_video,,declares_day_prayer_follower,declares_relaxation_payment_premium,cure_bleach_follower_er,speak_passenger_cruise_couple
3,coronavirus_epidemic_case,confirms_death_coronavirus,confirmed_coronavirus_case,reports_case_coronavirus_tally,coronavirus_recession_economy,cancels_race_coronavirus_outbreak,case_bullet_coronavirus_day,impose_lockdown_coronavirus,confirms_human_coronavirus_transmission,postponed_tour_coronavirus_concern,close_business_coronavirus_school,coronavirus_rally_scare,coronavirus_outbreak_sign,find_test_coronavirus_testing,seeks_sanction_coronavirus_case,protects_coronavirus_football,says_infection_coronavirus_cure,goes_disinformation_coronavirus_search,declares_emergency_coronavirus_pandemic,gather_stay_coronavirus_hundred,coronavirus_guard_prison_riot,coronavirus_case_region,surge_rate_coronavirus_case,opens_patient_coronavirus_hospital,preparing_cut_coronavirus_game,cancels_flight_coronavirus_outbreak,dies_rate_coronavirus_worker,dies_coronavirus,catch_coronavirus_package_home,coronavirus_emergency_health,bans_outbreak_coronavirus_people,deal_deal_coronavirus_package,fails_vote_coronavirus_stimulus,detected_point_coronavirus_patient,warns_messaging_coronavirus_death,kept_coronavirus_isolation,ignored_coronavirus_time,"bars_6,000_coronavirus_cruise",stocks_check_coronavirus_stock,dies_response_coronavirus_father,tests_positive_coronavirus_breaking,coronavirus_apartment_hotel,india_outbreak_coronavirus_cardboard,update_case_coronavirus_infection,tested_waivers_coronavirus_student,suburbs_coronavirus_cluster,questions_action_coronavirus_symptom,coronavirus_advisory_outbreak,work_employee_coronavirus_week,,calls_klopp_coronavirus_chief,risks_vaccine_coronavirus,journalism_coverage_coronavirus_time,person_case,"approach_30,000_coronavirus_case",ventilators_ventilator_pandemic_aid,coronavirus_use_cup,says_national_coronavirus_quarantine,lists_measure_coronavirus_tip,coronavirus_300_face,canceled_crisis_coronavirus_auto,coronavirus_shop_takeaway,struggling_coronavirus_farmer,help_spread_coronavirus_lung,stallone_production_coronavirus_pandemic,seeks_donation_coronavirus_brief,rise_coronavirus_weather,committee_spread_coronavirus_govt,delayed_wrench_coronavirus_production,explainer_coronavirus_guide,forces_beach_weekend_coronavirus,allocates_yuan_coronavirus_combat,production_iphone_coronavirus,says_impact_ass_review,coronavirus_care_immigrant,coronavirus_pantry_food_meal,shut_nightclubs_coronavirus_lockdown,asks_expert_science_solidarity,says_restriction_coronavirus_travel,hold_key_alpaca_news,conducted_job_interview,positive_doctor_woman_coronavirus,,,provide_update_health_official,thanks_layoffs_college_deposit,,understate_toll_count_estimate,,,fits_contour_crisis_world,gates_warrior_billionaire_fight,colleges_update_official_action,,reports_passenger,created_website_year_student,,sees_drop_death_month,,warn_skin_rash,
4,coronavirus_thrombus_case,reaches_official_death_coronavirus,confirms_case_coronavirus_police,newsletter_hospitalizations_coronavirus_outbreak,says_depression_coronavirus_economy,coronavirus_minority,deaths_death_coronavirus_week,extends_lockdown_coronavirus,coronavirus_gap_man,postponed_trial_coronavirus_threat,closes_school_capital_coronavirus,anxiety_symptom_coronavirus,colouring_campaign_coronavirus_handling,says_test_coronavirus,trend_drop_coronavirus_case,skip_gym_coronavirus_pandemic,breakthrough_death_coronavirus_steroid,trust_testing_coronavirus_evidence,approves_declaration_disaster_coronavirus,urges_people_coronavirus_pm,tests_positive_coronavirus_prison,records_case_coronavirus_province,cases_coronavirus_ph,needs_bed_coronavirus_hospital,nominated_player_coronavirus_issue,diagnosed_coronavirus_flight,communities_measures_coronavirus_death,rises_death_coronavirus,writes_letter_coronavirus_household,coronavirus_report_apocalypse,blocks_icke_coronavirus_video,create_shortage_fear_coronavirus,spread_administration_coronavirus_bill,ways_coronavirus_doctor,stay_way_coronavirus_message,expand_facilities_coronavirus_quarantine,plunge_toll_coronavirus_export,stranded_kit_coronavirus_cruise,announces_million_coronavirus_stock,talk_time_child_coronavirus,tests_positive_coronavirus_opera,queensland_concern_coronavirus_hotel,cause_pressure_coronavirus_blood,updates_infection_coronavirus_symptom,tests_vaccine_coronavirus_president,suspends_eviction_coronavirus_city,symptoms_symptom_coronavirus_virus,know_world_coronavirus_virus,coronavirus_engineer_police,,coronavirus_response_thing,developed_vaccine_coronavirus_scientist,tells_coronavirus_president,news_case_official,exceed_million_coronavirus_case,,coronavirus_glass_queue_drinker,speaks_official_coronavirus_ministry,adopts_measure_coronavirus_outbreak,wearing_mask_people_head,outlook_329_coronavirus_case,coronavirus_supermarket_worker,announces_billion_coronavirus_farmer,spread_case_person_coronavirus,robots_doctor_coronavirus_debut,announces_funding_coronavirus_fund,bring_reprieve_coronavirus_weather,prevent_coronavirus_government,ramps_production_mask_coronavirus,advice_patient_self_coronavirus,unveils_campaign_coronavirus_drowning,coronavirus_wuhan_returnee,surgeries_appointment_coronavirus_phone,,kills_victim_coronavirus_staffer,coronavirus_list_virus_bacteria,extends_curfew_coronavirus_case,,restrictions_test_coronavirus_travel,,,tested_coronavirus_woman,,,launches_information_health_organization,urges_trump_border,,predicts_thousand_hundred_death,,,spotted_lockdown_area,latest_organizer_protester_business,extended_support_site_reversal,,,,,,,suspended_outbreak,
5,case_case_coronavirus,reports_death_coronavirus_doctor,confirms_case_coronavirus_chief,reported_news_coronavirus_case,warns_recession_coronavirus,races_surge_case_coronavirus,coronavirus_holiday_update_briefing,life_lockdown_coronavirus,lessons_gravity_coronavirus_outbreak,canceled_coronavirus_twitchcon,confirms_case_coronavirus_school,sends_jitters_coronavirus_hospital,paintings_awareness_coronavirus_theme,kits_test_coronavirus,exceed_authority_coronavirus_case,plans_esport_coronavirus_sporting,trials_coronavirus_chloroquine,struggling_detail_coronavirus_misinformation,approves_patient_coronavirus_emergency,commits_500_coronavirus_arena,dies_coronavirus_inmate,nears_death_state_coronavirus,rise_case_coronavirus,coronavirus_case_hospital,performance_music_coronavirus_shutdown,suspend_flights_coronavirus_curb,rises_37_coronavirus_death,rise_lockdown_coronavirus_death,help_coronavirus_household,launches_alert_coronavirus_search,bans_gathering_coronavirus_week,apologizes_check_coronavirus_cash,block_bill_coronavirus_stimulus,apologizes_coronavirus_doctor_question,heightened_awareness_scam_coronavirus,coronavirus_facility_quarantine,halts_expansion_coronavirus_wave,coronavirus_update_cruise,invest_portfolio_coronavirus_investing,coronavirus_7_parent,tests_positive_employee_coronavirus,coronavirus_hotel_tourist,report_scare_year_virus,update_level_coronavirus_emergency,senator_test_coronavirus,flee_hospital_coronavirus,symptoms_coronavirus_detail,timeline_outbreak_coronavirus,coronavirus_decision_recruitment,,calls_staff_coronavirus_panic,progress_coronavirus_vaccine,force_trump_coronavirus_task,grow_case_death,coronavirus_capacity_case,,,says_case_coronavirus_doctor,measures_vulnerable_coronavirus_bank,report_mask_coronavirus,canceled_coronavirus_update,gets_pop_coronavirus_shop,workers_coronavirus_plant,spread_coronavirus_air,close_hall_coronavirus_cinema,pandemic_ps100bn_coronavirus_pension,coronavirus_climate,letting_blood_coronavirus_politician,announces_package_coronavirus_case,advice_approach_coronavirus_health,protests_beach_coronavirus_crisis,angers_china_cartoon_coronavirus,prepares_app_coronavirus_self,,says_offer_coronavirus_immigrant,suffers_loss_coronavirus_poultry,delays_mosque_night_coronavirus,,study_entry_coronavirus_country,,,confirmed_coronavirus_woman,,,,shut_ending_trump_vow,,rises_authority_toll,,,,,shifted_patient_college,,,,,,,immunity,
6,suggests_infection_coronavirus_case,announces_death_case_coronavirus,confirms_case_coronavirus_month,reported_case_coronavirus,crisis_economy_coronavirus_lockdown,discriminate_coronavirus_supremacist,coronavirus_eid_home,allowed_lockdown_coronavirus_week,faults_datum_coronavirus_spread,cancels_convention_coronavirus_spike,tested_coronavirus_student,panic_verdict_coronavirus_outbreak,coronavirus_soup_cartoon,coronavirus_kit_self,twists_death_coronavirus_case,coronavirus_spain_football_team,gets_drug_coronavirus_world,removes_coronavirus_misinformation,coronavirus_state_emergency,wishes_recovery_coronavirus_spokesman,released_coronavirus_prison,win_coronavirus_state,confirms_case_coronavirus_ph,coronavirus_head_hospital,coronavirus_cut_player,screened_abayomi_coronavirus_passenger,"died_40,000_coronavirus_case",coronavirus_death_aid,owner_stores_coronavirus,coronavirus_case_scientist,bans_coronavirus_pandemic,refuses_offer_coronavirus_conspiracy,hits_roadblock_coronavirus_bill,joined_battle_coronavirus_force,facebook_misinformation_coronavirus_virus,people_couple_coronavirus_family,arrive_rotation_coronavirus_postpone,coronavirus_antibody_sailor,tumble_demand_stock_coronavirus,dies_family_tripura_coronavirus,tests_positive_coronavirus_test,,coronavirus_swab_talk,watch_update_coronavirus_governor,refused_test_coronavirus_kid,spread_coronavirus_bathroom,symptoms_sign_coronavirus_warning,coronavirus_run_pace,says_coronavirus_employee_report,,holds_meeting_coronavirus_mathura,snag_vaccine_coronavirus,charged_president_coronavirus_blogger,confirms_case,coronavirus_million_case,,,says_pandemic_coronavirus_expert,eases_measure_coronavirus_weekend,face_restriction_coronavirus_region,cancels_licence_coronavirus_tv,closes_store_coronavirus_outbreak,limited_plant_coronavirus_workplace,human_person_coronavirus,filming_change_coronavirus_concern,assures_public_coronavirus_donation,,postpones_coronavirus_year_government,coronavirus_hand_impact_developer,warned_fauci_coronavirus,,,,,coronavirus_suicide_man_floor,soup_strain_bat_coronavirus,,,coronavirus_ban_travel_outbreak,,,coronavirus_study_woman,,,,,,,,,,,,,,,,,,,
7,case_case_coronavirus_health,dies_covid_person_coronavirus,confirms_case_coronavirus_report,reports_rise_coronavirus_death,coronavirus_potential_case_economy,infected_racism_coronavirus_opinion,coronavirus_case_hour,coronavirus_lockdown_district,slows_peak_coronavirus_expert,cancels_sxsw_coronavirus_fear,coronavirus_work_home_school,responds_fear_coronavirus_concern,crafting_wishlist_coronavirus_package,increased_test_coronavirus_testing,stopped_nat'l_coronavirus_case,tracker_coronavirus_athlete,coronavirus_fight_drug,waging_disinformation_coronavirus_distraction,factbox_state_coronavirus_emergency,pm_pakistanis_coronavirus_today,freed_coronavirus_offender,shortened_state_coronavirus_home,rise_infection_coronavirus_update,hospitalized_coronavirus_trouble,cash_billion_contestant_coronavirus,suspends_flight_coronavirus_outbreak,"says_51,000_death_coronavirus",dies_coronavirus_candidate,dealing_normal_coronavirus_home,coronavirus_elite_epicentre,bans_ads_mask_coronavirus,increases_inventory_coronavirus_supply,,recovered_coronavirus_hospitalisation,warns_australians_coronavirus_lockdown,casuals_hundred_coronavirus_isolation,ticks_coronavirus_clock_observation,update_ship_coronavirus_thousand,feds_price_coronavirus_trader,dies_death_coronavirus_infant,tests_positive_coronavirus_marouane,,sneaks_sense_smell_coronavirus,updates_immunity_recovery_coronavirus,cleared_coronavirus_bowler,light_darkness_coronavirus_battle,_coronavirus_symptom,creates_spread_pandemic_coronavirus,coronavirus_building_employee,,attendees_coronavirus_conference,widening_poll_coronavirus_vaccine,meets_force_task_coronavirus,stays_case_number,reach_peak_coronavirus_infection,,,says_vatican_coronavirus_case,planning_grade_coronavirus_measure,buy_mask_face_coronavirus,refund_coronavirus_holiday,,says_production_coronavirus,coronavirus_health,,offers_reward_coronavirus_antidote,,start_smart_coronavirus_official,rolls_effort_coronavirus_package,guide_sex_coronavirus_outbreak,,,,,buried_victim_coronavirus,bethlehem_mall_coronavirus_restaurant,,,spread_coronavirus_country,,,transmit_coronavirus_woman,,,,,,,,,,,,,,,,,,,
8,case_case_coronavirus_1st,confirms_case_coronavirus_death,confirms_case_president_coronavirus,records_death_coronavirus_patient,lawmaker_coronavirus_economy_factor,use_coronavirus_racism_excuse,announces_case_coronavirus_day,eases_lockdown_coronavirus_confusion,tumbles_coronavirus_impact,cancels_tour_coronavirus_birthday,universities_situation_coronavirus_concern,spreads_fear_coronavirus_contagion,coronavirus_fear_blue_art,says_testing_coronavirus_test,reports_arrests_coronavirus_case,suspends_production_wrestler_coronavirus,declares_coronavirus_treatment,check_hoax_coronavirus_fact,declared_emergency_health_death,holds_parade_coronavirus_spread,daughter_inmate_coronavirus,outbreak_canadians_coronavirus,rises_upsurge_coronavirus_case,case_service_coronavirus_hospital,lottery_coronavirus_outbreak,coronavirus_indians_plane_today,"surpassed_2,000_coronavirus_death",dies_die_coronavirus,hits_home_pool_coronavirus,coronavirus_help_update_evening,warrant_mongering_coronavirus_pandemic,impacting_industry_coronavirus_tech,,positive_coronavirus_patient,evacuate_australians_coronavirus_scam,serving_doorway_coronavirus_risk,opened_floodgate_package_coronavirus,quarantine_sailor_coronavirus_navy,stocks_breather_coronavirus_stock,newborn_baby_coronavirus,midfielder_positive_coronavirus_test,,report_symptom_coronavirus_student,"updates_335,000_coronavirus_case",,caves_seating_coronavirus_travel,tests_symptom_woman_coronavirus,live_coronavirus_outbreak,tells_administration_pilot_coronavirus,,attended_seminar_coronavirus_doctor,working_dose_coronavirus_vaccine,faces_crisis_coronavirus_presidency,,surpasses_case_coronavirus_day,,,says_coronavirus_championship,know_coronavirus_%,trolled_mask_coronavirus_hen,postponed_coronavirus_console,,,man_coronavirus,,raise_euro_coronavirus_research,,cuomo_policy_coronavirus_nursing,,offers_caution_coronavirus_psychologist,,,,,poverty_despair_coronavirus_pandemic,eating_infection_coronavirus_scientist,,,coronavirus_restriction_cotton_travel,,,dies_life_woman_coronavirus,,,,,,,,,,,,,,,,,,,
9,cases_case_coronavirus_ministry,reports_death_coronavirus_case,confirmed_case_coronavirus_virus,reports_case_coronavirus_hospital,,stop_spread_racism_coronavirus,cases_weekend_coronavirus,break_advice_coronavirus_lockdown,disconnections_cap_coronavirus_carrier,cancelled_day_coronavirus_fear,coronavirus_coronavirus_intern_youngster,gets_boost_coronavirus_fear,screen_question_coronavirus,tests_coronavirus_test,arrests_news_coronavirus_post,postponed_fear_coronavirus_football,warns_coronavirus_drug,remove_theory_coronavirus_conspiracy,declares_state_coronavirus_emergency,advises_fan_coronavirus,cases_coronavirus_prison,depends_regions_today_coronavirus,surge_jump_case_coronavirus,admitted_inquiry_coronavirus_hospital,musician_coronavirus_patient,screened_kaadha_people_coronavirus,"rises_1,769_coronavirus_death",death_death_coronavirus,,coronavirus_news_wrap,suspended_selfie_coronavirus,simplify_money_coronavirus_thing,,celebrate_recovery_coronavirus_hospital,whydoweneedaking_online_coronavirus_healthtech,cdc_screening_coronavirus_quarantine,ease_restriction_coronavirus_recession,met_coronavirus_cruise_passenger,buy_pharmaceuticals_stock_coronavirus,worry_coronavirus_baby_child,tests_positive_host_coronavirus,,coronavirus_maharashtra_team_watch,updates_wfh_coronavirus_place,,team_attack_coronavirus_county,,look_office_coronavirus_pandemic,continue_worker_coronavirus_outbreak,,barb_aide_coronavirus_leader,tackle_ps46_coronavirus_research,dies_coronavirus_president,,"surpass_100,000_coronavirus_case",,,says_coronavirus_outbreak,,releases_mask_coronavirus_face,,,,stumble_case_coronavirus_health,,un_fund_coronavirus_ministry,,coronavirus_parliament_business,,encouraging_student_coronavirus_field,,,,,want_epidemic_ihc_step,,,,,,,,,,,,,,,,,,,,,,,,,,


In [92]:
Frames.shape

(53, 101)

## Frame Sub-Frame Corpus Maker

In [118]:
pd_all = pd.read_csv('D:\\NLP\\Frame_NLP\\archive\\Sent_Cluster\\labeled_clusters99.csv')
pd_all.head()

Unnamed: 0,Processed_Title,label_st1,label,category
0,"US coronavirus death toll rises to 14, most cases in Washington",85,reports_case_coronavirus_death,rises_37_coronavirus_death
1,"Iran rejects U.S. offer for coronavirus aid, cites conspiracy theory",17,recover_coronavirus_death,refuses_offer_coronavirus_conspiracy
2,TSA is making 3 changes to stop coronavirus from spreading on planes,-1,coronavirus_coronavirus_pandemic,expands_change_airport_coronavirus
3,NBA Players to Take Pay Cut Amidst Coronavirus Crisis,64,coronavirus_coronavirus_player,preparing_cut_coronavirus_game
4,"Coronavirus Cases confirmed in North Korea, say officials",21,vote_kit_coronavirus_case,coronavirus_exercise_case_week


In [114]:
category_clusters.iloc[0,1]

" ['reports case coronavirus death', 'dies victim coronavirus doctor', 'reports death coronavirus', 'confirms death coronavirus', 'reaches official death coronavirus', 'reports death coronavirus doctor', 'announces death case coronavirus', 'dies covid person coronavirus', 'confirms case coronavirus death', 'reports death coronavirus case', 'reports death coronavirus person', 'cleared death coronavirus news', 'confirmed coronavirus death', 'announced death coronavirus', 'dies coronavirus news', 'updated case coronavirus death', 'announces death coronavirus patient', 'stories talk coronavirus death', 'cases victim coronavirus death', 'reports death coronavirus life', 'confirms death coronavirus official', 'confirm death coronavirus health', 'dies coronavirus patient', 'confirms death coronavirus patient', 'died victim coronavirus patient']"

In [119]:
Frames.head(30)

Unnamed: 0,Cluster 90,Cluster 86,Cluster 60,Cluster 14,Cluster 78,Cluster 38,Cluster 19,Cluster 98,Cluster 7,Cluster 27,Cluster 56,Cluster 63,Cluster 22,Cluster 12,Cluster 46,Cluster 26,Cluster 8,Cluster 37,Cluster 68,Cluster 49,Cluster 71,Cluster 33,Cluster 29,Cluster 100,Cluster 5,Cluster 24,Cluster 28,Cluster 84,Cluster 85,Cluster 16,Cluster 54,Cluster 13,Cluster 53,Cluster 25,Cluster 15,Cluster 74,Cluster 20,Cluster 82,Cluster 94,Cluster 42,Cluster 51,Cluster 59,Cluster 1,Cluster 99,Cluster 91,Cluster 58,Cluster 72,Cluster 43,Cluster 93,Cluster 101,Cluster 3,Cluster 39,Cluster 30,Cluster 31,Cluster 57,Cluster 52,Cluster 79,Cluster 64,Cluster 73,Cluster 66,Cluster 17,Cluster 76,Cluster 96,Cluster 32,Cluster 45,Cluster 67,Cluster 77,Cluster 23,Cluster 50,Cluster 88,Cluster 36,Cluster 70,Cluster 34,Cluster 48,Cluster 10,Cluster 18,Cluster 69,Cluster 35,Cluster 83,Cluster 21,Cluster 44,Cluster 92,Cluster 80,Cluster 55,Cluster 47,Cluster 6,Cluster 89,Cluster 2,Cluster 61,Cluster 95,Cluster 41,Cluster 40,Cluster 4,Cluster 81,Cluster 97,Cluster 11,Cluster 75,Cluster 62,Cluster 87,Cluster 9,Cluster 65
0,coronavirus_coronavirus_case,reports_case_coronavirus_death,confirms_case_coronavirus,reports_case_coronavirus,coronavirus_market,race_vaccine_coronavirus,coronavirus_pinch_holiday,coronavirus_lockdown,coronavirus_transmission_glance,canceled_fear_coronavirus_concern,closes_school_coronavirus,urges_people_coronavirus_scare,coronavirus_coronavirus_update_look,test_immunity_coronavirus_antibody,coronavirus_restriction_patient,coronavirus_event_sport,coronavirus_coronavirus_cure,looms_disinformation_coronavirus_misinformation,declares_emergency_coronavirus,coronavirus_nightmare_prayer,got_case_coronavirus_jail,system_result_coronavirus_state,surges_case_coronavirus,coronavirus_ward_hospital,coronavirus_coronavirus_player,says_risk_coronavirus_airline,reaches_112_coronavirus_death,rises_coronavirus_death,coronavirus_thing_home_care,coronavirus_development_nrl,enforce_measure_coronavirus_government,changing_disruption_coronavirus_business,passes_bill_coronavirus_relief,recovered_recovery_coronavirus_case,says_research_coronavirus_hacker,coronavirus_coronavirus_quarantine,closes_border_coronavirus,coronavirus_positive_ship_cruise,plunge_sell_market_coronavirus,coronavirus_dad_librarian,tests_negative_coronavirus_doctor,closed_facility_coronavirus_tourist,spread_coronavirus_dog_cat,coronavirus_update_case,approves_test_coronavirus,voted_hotspot_coronavirus_case,tests_positive_coronavirus_symptom,coronavirus_life_outbreak,says_return_coronavirus_job,tests_positive_owner_knick,coronavirus_mass_church,worried_shot_coronavirus_flu,pence_member_coronavirus_fight,latest_cases_death_case,"coronavirus_6,000_case",coronavirus_ventilator,think_misinformation_coronavirus_beer,says_coronavirus_patient,extend_measure_coronavirus_case,coronavirus_mask_pandemic,halts_warranty_coronavirus_day,handles_shopper_coronavirus_staff,grow_coronavirus_scientist_lab,coronavirus_coronavirus_risk,coronavirus_movie,spend_fund_coronavirus_response,ready_stimulus_coronavirus_temper,seek_hand_coronavirus_govts,signs_package_coronavirus_emergency,builds_awareness_coronavirus,updates_beaches_coronavirus_pandemic,tells_china_coronavirus_outbreak,report_app_coronavirus,confirmed_amvca_case_isolation,coronavirus_self_victim,pizzeria_customer_coronavirus_dining,declares_curfew_coronavirus_lockdown,says_response_whos_curve,coronavirus_uk_travel_ban,play_role_woman_parent,podcast_podcast_time_houseparty,aged_coronavirus_woman,hospitalized_president_wife,dies_ambassador_leader_aide,protocol_plan_instruction_disease,suffer_blow_etfs_opportunity,timeline_sport_response,rises_9692_death_toll,updates_update_world,underlines_need_effort_govt,tightens_case_tourist_emergency,desantis_millions_case_state,announces_recovery_patient_death,rises_number_case,reports_spike_view_industry,help_game_developer,fall_price_oil,update_dw_birthday,waives_fee_coronavirus_consumer,bet_opinion_cure,news_ship_cruise
1,cases_case_coronavirus,dies_victim_coronavirus_doctor,confirms_case_coronavirus_governor,reports_loss_case_coronavirus,coronavirus_economy,coronavirus_authoritarianism_opinion,cases_case_coronavirus_morning,coronavirus_restriction_lockdown,coronavirus_chains_effect,postponed_anniversary_coronavirus_fear,coronavirus_closure_school,coronavirus_weekend_panic,coronavirus_appearance_official,test_people_coronavirus,treated_limit_coronavirus_patient,coronavirus_exercise_case_week,coronavirus_coronavirus_disinfectant,claims_hunch_coronavirus_expert,declares_emergency_health,spread_positive_coronavirus_protest,test_coffin_coronavirus_inmate,reports_coronavirus_region,rise_candle_coronavirus_case,hospital_coronavirus_field,coronavirus_crisis_celebrity,suspends_flight_coronavirus_disease,number_coronavirus,rises_life_coronavirus_death,hits_home_nursing_coronavirus,coronavirus_adeboye_news,tightens_rule_coronavirus_crisis,help_recovery_coronavirus_firm,signs_bill_coronavirus_spending,coronavirus_discharge_patient,coronavirus_message_people,distancing_spread_coronavirus,closes_border_coronavirus_rail,coronavirus_season_cruise_passenger,leads_price_coronavirus_pandemic,coronavirus_child,tests_positive_coronavirus_exposure,coronavirus_hotel_quarantine,buddy_positive_dog_coronavirus,watch_update_governor_coronavirus,passes_trial_vaccine_coronavirus,expands_change_airport_coronavirus,adds_symptom_coronavirus,latest_coronavirus_spread,trickling_job_coronavirus_state,tests_positive_basketball_legend,answered_question_coronavirus,vaccine_vaccine_coronavirus,says_counterattack_coronavirus_trump,cases_record_case_death,coronavirus_population_pandemic,sending_ventilator_coronavirus_spot,surge_virus_beer,says_case_coronavirus_official,stock_rally_coronavirus_%,protect_mask_coronavirus_face,returns_boost_company_coronavirus,closes_factory_coronavirus_concern,regulator_coronavirus_fishery,photos_people_coronavirus_lung,robots_coronavirus_robot,sends_mourn_coronavirus_pandemic,helps_cold_coronavirus_allergy,launches_service_coronavirus_government,turbografx_production_coronavirus_mini,tips_coronavirus_mind,expo_paddlefest_pandemic_fear,coronavirus_wuhan_death,coronavirus_phone_time,accelerated_sparkle_warning_cava,coronavirus_name_victim,coronavirus_covid_food,closes_casinos_coronavirus_spread,says_appetite_consumerist_design,extends_flight_coronavirus_country,suggests_urine_cow_yagna,holds_briefing_press_task,coronavirus_birth_woman,symptoms_symptom_difference_nausea,dies_official_adviser,covid_notification_commentator_life,warns_impact_economy_trump,cancels_season_league_soccer,"updates_1,223_death_toll",updates_lockdown_case,proposes_fund_chieftain_government,reopen_australia_gym_crowd,spreading_disinformation_scammer_million,recover_patient_%,rises_case_number,role_intercity_bus,20,plunges_%_oil,trial_vaccine_month,waives_fee_coronavirus_indigo,shuts_school_outbreak,grows_case_cruise_ship
2,came_infections_coronavirus_case,reports_death_coronavirus,confirmed_case_coronavirus,reports_spike_case_coronavirus,coronavirus_growth_economy_outbreak,speak_negros_coronavirus_racism,opens_coronavirus_day,coronavirus_briefing_lockdown,confirmed_coronavirus_60,postponed_match_coronavirus_force,diagnosed_coronavirus_student,terrifies_coronavirus_awareness,seriously_outbreak_coronavirus_map,tests_test_coronavirus,denies_case_coronavirus_death,dies_coronavirus_wrestler,researchers_coronavirus_drug,warned_misinformation_coronavirus_report,declares_emergency_coronavirus_health,appeal_coronavirus_pm,tests_coronavirus_inmate,reports_case_coronavirus_state,rise_stimulus_coronavirus_case,admitted_symptom_coronavirus_hospital,celebrities_coronavirus_celebrity,resume_flight_coronavirus_airline,"pass_20,000_coronavirus_death",death_help_coronavirus_returnee,bungie_employees_home_coronavirus,reports_case_coronavirus_hour,suppress_content_g_coronavirus,faces_threat_coronavirus_shortage,reaches_deal_coronavirus_bill,recovers_coronavirus_patient,says_website_coronavirus_self,coronavirus_orphanage_prevention,coronavirus_deadline_subway,suspend_cruise_coronavirus_month,timing_growth_coronavirus_stock,confirms_coronavirus_daughter,tests_positive_coronavirus_officer,coronavirus_foreigner_hotel,coronavirus_transfusions_plasma,updates_coverage_coronavirus_pandemic,passes_package_coronavirus_stimulus,mandalay_rule_coronavirus_travel,symptoms_coronavirus_expert,coronavirus_world,coronavirus_employment_pandemic,require_test_quarantine_physician,speaks_crisis_coronavirus_bra,ready_breakthrough_coronavirus_vaccine,mission_bill_coronavirus_trump,person_case_resident,"coronavirus_100,000_pandemic",updates_ventilator_patient_isolation,refusing_cup_coronavirus,says_6mln_coronavirus_people,adopt_measure_coronavirus_end,coronavirus_passenger_mask,outlook_state_coronavirus_reopening,closes_store_coronavirus_week,production_production_coronavirus_concern,transmit_coronavirus_risk,watch_hall_coronavirus_town,sends_aid_coronavirus_fight,says_matter_coronavirus_climate,awaits_inhabitants_coronavirus_government,unveils_package_coronavirus_death,learn_limit_coronavirus_crisis,canceled_open_surfing_pandemic,peak_china_coronavirus_day,cracks_misinformation_coronavirus_app,finds_area_impact,coronavirus_migrant_immigrant_ice,dies_coronavirus_pizza,curfews_curfew_summer_coronavirus,found_sample_person,dies_country_coronavirus,buying_paper_toilet_mum,blamed_hearings_trial_delay,dies_coronavirus_woman,hospitalized_symptom_condition,dies_officer_hundred,care_medium_news_cycle,ways_security_impact,live_event_ea_esport,takes_toll_home,updates_number_case,drives_spending_government_level,alleged_riot_prison_rumor,uniting_misinformation_giant_company,released_rest_concern_home,rises_200,reports_intelligence,pm_video,,declares_day_prayer_follower,declares_relaxation_payment_premium,cure_bleach_follower_er,speak_passenger_cruise_couple
3,coronavirus_epidemic_case,confirms_death_coronavirus,confirmed_coronavirus_case,reports_case_coronavirus_tally,coronavirus_recession_economy,cancels_race_coronavirus_outbreak,case_bullet_coronavirus_day,impose_lockdown_coronavirus,confirms_human_coronavirus_transmission,postponed_tour_coronavirus_concern,close_business_coronavirus_school,coronavirus_rally_scare,coronavirus_outbreak_sign,find_test_coronavirus_testing,seeks_sanction_coronavirus_case,protects_coronavirus_football,says_infection_coronavirus_cure,goes_disinformation_coronavirus_search,declares_emergency_coronavirus_pandemic,gather_stay_coronavirus_hundred,coronavirus_guard_prison_riot,coronavirus_case_region,surge_rate_coronavirus_case,opens_patient_coronavirus_hospital,preparing_cut_coronavirus_game,cancels_flight_coronavirus_outbreak,dies_rate_coronavirus_worker,dies_coronavirus,catch_coronavirus_package_home,coronavirus_emergency_health,bans_outbreak_coronavirus_people,deal_deal_coronavirus_package,fails_vote_coronavirus_stimulus,detected_point_coronavirus_patient,warns_messaging_coronavirus_death,kept_coronavirus_isolation,ignored_coronavirus_time,"bars_6,000_coronavirus_cruise",stocks_check_coronavirus_stock,dies_response_coronavirus_father,tests_positive_coronavirus_breaking,coronavirus_apartment_hotel,india_outbreak_coronavirus_cardboard,update_case_coronavirus_infection,tested_waivers_coronavirus_student,suburbs_coronavirus_cluster,questions_action_coronavirus_symptom,coronavirus_advisory_outbreak,work_employee_coronavirus_week,,calls_klopp_coronavirus_chief,risks_vaccine_coronavirus,journalism_coverage_coronavirus_time,person_case,"approach_30,000_coronavirus_case",ventilators_ventilator_pandemic_aid,coronavirus_use_cup,says_national_coronavirus_quarantine,lists_measure_coronavirus_tip,coronavirus_300_face,canceled_crisis_coronavirus_auto,coronavirus_shop_takeaway,struggling_coronavirus_farmer,help_spread_coronavirus_lung,stallone_production_coronavirus_pandemic,seeks_donation_coronavirus_brief,rise_coronavirus_weather,committee_spread_coronavirus_govt,delayed_wrench_coronavirus_production,explainer_coronavirus_guide,forces_beach_weekend_coronavirus,allocates_yuan_coronavirus_combat,production_iphone_coronavirus,says_impact_ass_review,coronavirus_care_immigrant,coronavirus_pantry_food_meal,shut_nightclubs_coronavirus_lockdown,asks_expert_science_solidarity,says_restriction_coronavirus_travel,hold_key_alpaca_news,conducted_job_interview,positive_doctor_woman_coronavirus,,,provide_update_health_official,thanks_layoffs_college_deposit,,understate_toll_count_estimate,,,fits_contour_crisis_world,gates_warrior_billionaire_fight,colleges_update_official_action,,reports_passenger,created_website_year_student,,sees_drop_death_month,,warn_skin_rash,
4,coronavirus_thrombus_case,reaches_official_death_coronavirus,confirms_case_coronavirus_police,newsletter_hospitalizations_coronavirus_outbreak,says_depression_coronavirus_economy,coronavirus_minority,deaths_death_coronavirus_week,extends_lockdown_coronavirus,coronavirus_gap_man,postponed_trial_coronavirus_threat,closes_school_capital_coronavirus,anxiety_symptom_coronavirus,colouring_campaign_coronavirus_handling,says_test_coronavirus,trend_drop_coronavirus_case,skip_gym_coronavirus_pandemic,breakthrough_death_coronavirus_steroid,trust_testing_coronavirus_evidence,approves_declaration_disaster_coronavirus,urges_people_coronavirus_pm,tests_positive_coronavirus_prison,records_case_coronavirus_province,cases_coronavirus_ph,needs_bed_coronavirus_hospital,nominated_player_coronavirus_issue,diagnosed_coronavirus_flight,communities_measures_coronavirus_death,rises_death_coronavirus,writes_letter_coronavirus_household,coronavirus_report_apocalypse,blocks_icke_coronavirus_video,create_shortage_fear_coronavirus,spread_administration_coronavirus_bill,ways_coronavirus_doctor,stay_way_coronavirus_message,expand_facilities_coronavirus_quarantine,plunge_toll_coronavirus_export,stranded_kit_coronavirus_cruise,announces_million_coronavirus_stock,talk_time_child_coronavirus,tests_positive_coronavirus_opera,queensland_concern_coronavirus_hotel,cause_pressure_coronavirus_blood,updates_infection_coronavirus_symptom,tests_vaccine_coronavirus_president,suspends_eviction_coronavirus_city,symptoms_symptom_coronavirus_virus,know_world_coronavirus_virus,coronavirus_engineer_police,,coronavirus_response_thing,developed_vaccine_coronavirus_scientist,tells_coronavirus_president,news_case_official,exceed_million_coronavirus_case,,coronavirus_glass_queue_drinker,speaks_official_coronavirus_ministry,adopts_measure_coronavirus_outbreak,wearing_mask_people_head,outlook_329_coronavirus_case,coronavirus_supermarket_worker,announces_billion_coronavirus_farmer,spread_case_person_coronavirus,robots_doctor_coronavirus_debut,announces_funding_coronavirus_fund,bring_reprieve_coronavirus_weather,prevent_coronavirus_government,ramps_production_mask_coronavirus,advice_patient_self_coronavirus,unveils_campaign_coronavirus_drowning,coronavirus_wuhan_returnee,surgeries_appointment_coronavirus_phone,,kills_victim_coronavirus_staffer,coronavirus_list_virus_bacteria,extends_curfew_coronavirus_case,,restrictions_test_coronavirus_travel,,,tested_coronavirus_woman,,,launches_information_health_organization,urges_trump_border,,predicts_thousand_hundred_death,,,spotted_lockdown_area,latest_organizer_protester_business,extended_support_site_reversal,,,,,,,suspended_outbreak,
5,case_case_coronavirus,reports_death_coronavirus_doctor,confirms_case_coronavirus_chief,reported_news_coronavirus_case,warns_recession_coronavirus,races_surge_case_coronavirus,coronavirus_holiday_update_briefing,life_lockdown_coronavirus,lessons_gravity_coronavirus_outbreak,canceled_coronavirus_twitchcon,confirms_case_coronavirus_school,sends_jitters_coronavirus_hospital,paintings_awareness_coronavirus_theme,kits_test_coronavirus,exceed_authority_coronavirus_case,plans_esport_coronavirus_sporting,trials_coronavirus_chloroquine,struggling_detail_coronavirus_misinformation,approves_patient_coronavirus_emergency,commits_500_coronavirus_arena,dies_coronavirus_inmate,nears_death_state_coronavirus,rise_case_coronavirus,coronavirus_case_hospital,performance_music_coronavirus_shutdown,suspend_flights_coronavirus_curb,rises_37_coronavirus_death,rise_lockdown_coronavirus_death,help_coronavirus_household,launches_alert_coronavirus_search,bans_gathering_coronavirus_week,apologizes_check_coronavirus_cash,block_bill_coronavirus_stimulus,apologizes_coronavirus_doctor_question,heightened_awareness_scam_coronavirus,coronavirus_facility_quarantine,halts_expansion_coronavirus_wave,coronavirus_update_cruise,invest_portfolio_coronavirus_investing,coronavirus_7_parent,tests_positive_employee_coronavirus,coronavirus_hotel_tourist,report_scare_year_virus,update_level_coronavirus_emergency,senator_test_coronavirus,flee_hospital_coronavirus,symptoms_coronavirus_detail,timeline_outbreak_coronavirus,coronavirus_decision_recruitment,,calls_staff_coronavirus_panic,progress_coronavirus_vaccine,force_trump_coronavirus_task,grow_case_death,coronavirus_capacity_case,,,says_case_coronavirus_doctor,measures_vulnerable_coronavirus_bank,report_mask_coronavirus,canceled_coronavirus_update,gets_pop_coronavirus_shop,workers_coronavirus_plant,spread_coronavirus_air,close_hall_coronavirus_cinema,pandemic_ps100bn_coronavirus_pension,coronavirus_climate,letting_blood_coronavirus_politician,announces_package_coronavirus_case,advice_approach_coronavirus_health,protests_beach_coronavirus_crisis,angers_china_cartoon_coronavirus,prepares_app_coronavirus_self,,says_offer_coronavirus_immigrant,suffers_loss_coronavirus_poultry,delays_mosque_night_coronavirus,,study_entry_coronavirus_country,,,confirmed_coronavirus_woman,,,,shut_ending_trump_vow,,rises_authority_toll,,,,,shifted_patient_college,,,,,,,immunity,
6,suggests_infection_coronavirus_case,announces_death_case_coronavirus,confirms_case_coronavirus_month,reported_case_coronavirus,crisis_economy_coronavirus_lockdown,discriminate_coronavirus_supremacist,coronavirus_eid_home,allowed_lockdown_coronavirus_week,faults_datum_coronavirus_spread,cancels_convention_coronavirus_spike,tested_coronavirus_student,panic_verdict_coronavirus_outbreak,coronavirus_soup_cartoon,coronavirus_kit_self,twists_death_coronavirus_case,coronavirus_spain_football_team,gets_drug_coronavirus_world,removes_coronavirus_misinformation,coronavirus_state_emergency,wishes_recovery_coronavirus_spokesman,released_coronavirus_prison,win_coronavirus_state,confirms_case_coronavirus_ph,coronavirus_head_hospital,coronavirus_cut_player,screened_abayomi_coronavirus_passenger,"died_40,000_coronavirus_case",coronavirus_death_aid,owner_stores_coronavirus,coronavirus_case_scientist,bans_coronavirus_pandemic,refuses_offer_coronavirus_conspiracy,hits_roadblock_coronavirus_bill,joined_battle_coronavirus_force,facebook_misinformation_coronavirus_virus,people_couple_coronavirus_family,arrive_rotation_coronavirus_postpone,coronavirus_antibody_sailor,tumble_demand_stock_coronavirus,dies_family_tripura_coronavirus,tests_positive_coronavirus_test,,coronavirus_swab_talk,watch_update_coronavirus_governor,refused_test_coronavirus_kid,spread_coronavirus_bathroom,symptoms_sign_coronavirus_warning,coronavirus_run_pace,says_coronavirus_employee_report,,holds_meeting_coronavirus_mathura,snag_vaccine_coronavirus,charged_president_coronavirus_blogger,confirms_case,coronavirus_million_case,,,says_pandemic_coronavirus_expert,eases_measure_coronavirus_weekend,face_restriction_coronavirus_region,cancels_licence_coronavirus_tv,closes_store_coronavirus_outbreak,limited_plant_coronavirus_workplace,human_person_coronavirus,filming_change_coronavirus_concern,assures_public_coronavirus_donation,,postpones_coronavirus_year_government,coronavirus_hand_impact_developer,warned_fauci_coronavirus,,,,,coronavirus_suicide_man_floor,soup_strain_bat_coronavirus,,,coronavirus_ban_travel_outbreak,,,coronavirus_study_woman,,,,,,,,,,,,,,,,,,,
7,case_case_coronavirus_health,dies_covid_person_coronavirus,confirms_case_coronavirus_report,reports_rise_coronavirus_death,coronavirus_potential_case_economy,infected_racism_coronavirus_opinion,coronavirus_case_hour,coronavirus_lockdown_district,slows_peak_coronavirus_expert,cancels_sxsw_coronavirus_fear,coronavirus_work_home_school,responds_fear_coronavirus_concern,crafting_wishlist_coronavirus_package,increased_test_coronavirus_testing,stopped_nat'l_coronavirus_case,tracker_coronavirus_athlete,coronavirus_fight_drug,waging_disinformation_coronavirus_distraction,factbox_state_coronavirus_emergency,pm_pakistanis_coronavirus_today,freed_coronavirus_offender,shortened_state_coronavirus_home,rise_infection_coronavirus_update,hospitalized_coronavirus_trouble,cash_billion_contestant_coronavirus,suspends_flight_coronavirus_outbreak,"says_51,000_death_coronavirus",dies_coronavirus_candidate,dealing_normal_coronavirus_home,coronavirus_elite_epicentre,bans_ads_mask_coronavirus,increases_inventory_coronavirus_supply,,recovered_coronavirus_hospitalisation,warns_australians_coronavirus_lockdown,casuals_hundred_coronavirus_isolation,ticks_coronavirus_clock_observation,update_ship_coronavirus_thousand,feds_price_coronavirus_trader,dies_death_coronavirus_infant,tests_positive_coronavirus_marouane,,sneaks_sense_smell_coronavirus,updates_immunity_recovery_coronavirus,cleared_coronavirus_bowler,light_darkness_coronavirus_battle,_coronavirus_symptom,creates_spread_pandemic_coronavirus,coronavirus_building_employee,,attendees_coronavirus_conference,widening_poll_coronavirus_vaccine,meets_force_task_coronavirus,stays_case_number,reach_peak_coronavirus_infection,,,says_vatican_coronavirus_case,planning_grade_coronavirus_measure,buy_mask_face_coronavirus,refund_coronavirus_holiday,,says_production_coronavirus,coronavirus_health,,offers_reward_coronavirus_antidote,,start_smart_coronavirus_official,rolls_effort_coronavirus_package,guide_sex_coronavirus_outbreak,,,,,buried_victim_coronavirus,bethlehem_mall_coronavirus_restaurant,,,spread_coronavirus_country,,,transmit_coronavirus_woman,,,,,,,,,,,,,,,,,,,
8,case_case_coronavirus_1st,confirms_case_coronavirus_death,confirms_case_president_coronavirus,records_death_coronavirus_patient,lawmaker_coronavirus_economy_factor,use_coronavirus_racism_excuse,announces_case_coronavirus_day,eases_lockdown_coronavirus_confusion,tumbles_coronavirus_impact,cancels_tour_coronavirus_birthday,universities_situation_coronavirus_concern,spreads_fear_coronavirus_contagion,coronavirus_fear_blue_art,says_testing_coronavirus_test,reports_arrests_coronavirus_case,suspends_production_wrestler_coronavirus,declares_coronavirus_treatment,check_hoax_coronavirus_fact,declared_emergency_health_death,holds_parade_coronavirus_spread,daughter_inmate_coronavirus,outbreak_canadians_coronavirus,rises_upsurge_coronavirus_case,case_service_coronavirus_hospital,lottery_coronavirus_outbreak,coronavirus_indians_plane_today,"surpassed_2,000_coronavirus_death",dies_die_coronavirus,hits_home_pool_coronavirus,coronavirus_help_update_evening,warrant_mongering_coronavirus_pandemic,impacting_industry_coronavirus_tech,,positive_coronavirus_patient,evacuate_australians_coronavirus_scam,serving_doorway_coronavirus_risk,opened_floodgate_package_coronavirus,quarantine_sailor_coronavirus_navy,stocks_breather_coronavirus_stock,newborn_baby_coronavirus,midfielder_positive_coronavirus_test,,report_symptom_coronavirus_student,"updates_335,000_coronavirus_case",,caves_seating_coronavirus_travel,tests_symptom_woman_coronavirus,live_coronavirus_outbreak,tells_administration_pilot_coronavirus,,attended_seminar_coronavirus_doctor,working_dose_coronavirus_vaccine,faces_crisis_coronavirus_presidency,,surpasses_case_coronavirus_day,,,says_coronavirus_championship,know_coronavirus_%,trolled_mask_coronavirus_hen,postponed_coronavirus_console,,,man_coronavirus,,raise_euro_coronavirus_research,,cuomo_policy_coronavirus_nursing,,offers_caution_coronavirus_psychologist,,,,,poverty_despair_coronavirus_pandemic,eating_infection_coronavirus_scientist,,,coronavirus_restriction_cotton_travel,,,dies_life_woman_coronavirus,,,,,,,,,,,,,,,,,,,
9,cases_case_coronavirus_ministry,reports_death_coronavirus_case,confirmed_case_coronavirus_virus,reports_case_coronavirus_hospital,,stop_spread_racism_coronavirus,cases_weekend_coronavirus,break_advice_coronavirus_lockdown,disconnections_cap_coronavirus_carrier,cancelled_day_coronavirus_fear,coronavirus_coronavirus_intern_youngster,gets_boost_coronavirus_fear,screen_question_coronavirus,tests_coronavirus_test,arrests_news_coronavirus_post,postponed_fear_coronavirus_football,warns_coronavirus_drug,remove_theory_coronavirus_conspiracy,declares_state_coronavirus_emergency,advises_fan_coronavirus,cases_coronavirus_prison,depends_regions_today_coronavirus,surge_jump_case_coronavirus,admitted_inquiry_coronavirus_hospital,musician_coronavirus_patient,screened_kaadha_people_coronavirus,"rises_1,769_coronavirus_death",death_death_coronavirus,,coronavirus_news_wrap,suspended_selfie_coronavirus,simplify_money_coronavirus_thing,,celebrate_recovery_coronavirus_hospital,whydoweneedaking_online_coronavirus_healthtech,cdc_screening_coronavirus_quarantine,ease_restriction_coronavirus_recession,met_coronavirus_cruise_passenger,buy_pharmaceuticals_stock_coronavirus,worry_coronavirus_baby_child,tests_positive_host_coronavirus,,coronavirus_maharashtra_team_watch,updates_wfh_coronavirus_place,,team_attack_coronavirus_county,,look_office_coronavirus_pandemic,continue_worker_coronavirus_outbreak,,barb_aide_coronavirus_leader,tackle_ps46_coronavirus_research,dies_coronavirus_president,,"surpass_100,000_coronavirus_case",,,says_coronavirus_outbreak,,releases_mask_coronavirus_face,,,,stumble_case_coronavirus_health,,un_fund_coronavirus_ministry,,coronavirus_parliament_business,,encouraging_student_coronavirus_field,,,,,want_epidemic_ihc_step,,,,,,,,,,,,,,,,,,,,,,,,,,


In [128]:
label_list_86 = Frames['Cluster 86'].tolist()
label_list_86[:50]

['reports_case_coronavirus_death',
 'dies_victim_coronavirus_doctor',
 'reports_death_coronavirus',
 'confirms_death_coronavirus',
 'reaches_official_death_coronavirus',
 'reports_death_coronavirus_doctor',
 'announces_death_case_coronavirus',
 'dies_covid_person_coronavirus',
 'confirms_case_coronavirus_death',
 'reports_death_coronavirus_case',
 'reports_death_coronavirus_person',
 'cleared_death_coronavirus_news',
 'confirmed_coronavirus_death',
 'announced_death_coronavirus',
 'dies_coronavirus_news',
 'updated_case_coronavirus_death',
 'announces_death_coronavirus_patient',
 'stories_talk_coronavirus_death',
 'cases_victim_coronavirus_death',
 'reports_death_coronavirus_life',
 'confirms_death_coronavirus_official',
 'confirm_death_coronavirus_health',
 'dies_coronavirus_patient',
 'confirms_death_coronavirus_patient',
 'died_victim_coronavirus_patient',
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 n

In [141]:
cluster_corpus = []

for i in label_list_86:
    if i != 'nan':
        pd_old = pd_all[pd_all['category']=='{}'.format(i)]['Processed_Title'].tolist()
        pd_new = [x for x in pd_old if x != 'nan']
        for item in pd_new:
            cluster_corpus.append(item)
        
        
cluster_corpus[:20] 


['Thailand Reports No New Coronavirus Cases Or Deaths',
 'Coronavirus live updates: 6th death in Texas reported in Dallas County',
 'Malaysia reports 123 new coronavirus cases, total at 1,306',
 'Two new coronavirus deaths, as national total hits 37',
 'Stanford reports undergraduate student has coronavirus',
 'Coronavirus latest: Thailand confirms six more cases',
 'CDC reports 690,714 coronavirus cases, 35,443 deaths',
 'CDC reports 1,571,617 coronavirus cases, 94,150 deaths',
 'Malaysia reports 179 new coronavirus cases and 4 more deaths',
 'Thailand confirms fifth case of new coronavirus',
 'Malaysia, Philippines Report Hundreds Of New Coronavirus Cases',
 'Officials confirm 5 cases of coronavirus in Houston area, including 4 in Harris County',
 'Malaysia reports 54 new coronavirus cases, 2 new deaths',
 'Thailand reports 50 new coronavirus cases, 1 new death',
 'Latest travel advice for Thailand as coronavirus outbreak kills 132 people',
 'First 2 cases of coronavirus in Harris Co

In [333]:
Frame_corpus = pd.DataFrame(cluster_corpus, columns=['Frame_Sentence'])
Frame_corpus.to_csv('D://NLP//Frame_NLP//archive//Sent_Cluster//Frame86_Corpus.csv', index=False)

In [312]:
label_list_78 = Frames['Cluster 78'].tolist()
label_list_78[:50]

['coronavirus_market',
 'coronavirus_economy',
 'coronavirus_growth_economy_outbreak',
 'coronavirus_recession_economy',
 'says_depression_coronavirus_economy',
 'warns_recession_coronavirus',
 'crisis_economy_coronavirus_lockdown',
 'coronavirus_potential_case_economy',
 'lawmaker_coronavirus_economy_factor',
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan]

In [322]:
SubFrame_corpus = {}


for i in label_list_78:
        if i != 'nan':
            SubFrame_sents = pd_all[pd_all['category']=='{}'.format(i)]['Processed_Title'].tolist()
            
        SubFrame_corpus = {**SubFrame_corpus, **{"{}".format(i): SubFrame_sents}}
        
            
    
    
Sub_Corpus = pd.DataFrame.from_dict(SubFrame_corpus, orient = 'index')
Sub_Corpus = Sub_Corpus.T

Sub_Corpus.head()



Unnamed: 0,coronavirus_market,coronavirus_economy,coronavirus_growth_economy_outbreak,coronavirus_recession_economy,says_depression_coronavirus_economy,warns_recession_coronavirus,crisis_economy_coronavirus_lockdown,coronavirus_potential_case_economy,lawmaker_coronavirus_economy_factor,nan
0,Coronavirus Fears Spread: What Are The Market Implications ?,Latin America faces 'lost decade' from coronavirus GDP drop,Coronavirus To Hurt Economic Growth In Many Countries: Moodys,Coronavirus and the latest indicators for the UK economy and society: 4 June 2020,Trump says US economy could take hit from coronavirus,Fed Chair Powell warns of prolonged U.S. recession after coronavirus,Coronavirus crisis: Lockdown batters India's already slowing economy,CORONAVIRUS: Best and worst case scenarios for global economy,Iranian Lawmaker: Hang Those Who Knowingly Spread Coronavirus,
1,When The Coronavirus Outbreak Creates A Panic Buying Boom For Your Product,The economic fallout of coronavirus,Fed chairman: Coronavirus 'likely' to hinder economic growth in China,Coronavirus: Evening update as UK faces 'significant recession' BBC News,UPDATE 1 Trump says economy could take hit from coronavirus,"Coronavirus recession may already be here, market strategist says",Can extending India's lockdown stop coronavirus ?,GnS: Coronavirus Has The Potential To Trigger A Global Depression,"Iran's economy a factor in coronavirus response, President Rouhani says",
2,European yields extend falls as investors assess coronavirus impact,Coronavirus: Atwoli in plea for rescue of economy,Coronavirus Outbreak Could Damage China's Economy: Sri Mulyani,Coronavirus and the latest indicators for the UK economy and society: 18 June 2020,Can Trump Avoid a Post Coronavirus Great Depression ?,Global recession risks have risen due to coronavirus Moody's,Coronavirus: Is it too early to ease lockdown restrictions in India ? DW 01.06.2020,Coronavirus recession: How bad it could get and what it means for you,Iran to use 20% of state budget to fight coronavirus,
3,Commercial brokers say coronavirus not having a big impact yet,Coronavirus: Eight charts on how it has shaken economies,Wuhan coronavirus to have minimum effect on Ukraine's economy ministry,Singapore economy forecast to shrink 0.8% in 2020 from coronavirus hit: SP,Trump vows coronavirus ravaged economy will 'bounce' back,Recession Risks: Coronavirus + Oil Shock + What Else ?,India coronavirus lockdown day 6 ICMR denies community transmission,,,
4,Covid 19 Coronavirus: NZ stockmarket dips back on the rollercoaster,"The Economic Hit From Coronavirus Will Be Worst Since The Great Depression, IMF Warns",Why Coronavirus outbreak in China is bad news for economy,UK economy shrinks 2% in first quarter on coronavirus,Trump says economy could take hit from coronavirus,Big banks bracing for coronavirus to cause first recession in 29 years,,,,


In [334]:
Sub_Corpus.to_csv('D://NLP//Frame_NLP//archive//Sent_Cluster//subframe78_Corpus.csv', index=False)
sub_frames = pd.read_csv('D://NLP//Frame_NLP//archive//Sent_Cluster//subframe78_Corpus.csv')

In [335]:
sub_frames.head()

Unnamed: 0,coronavirus_market,coronavirus_economy,coronavirus_growth_economy_outbreak,coronavirus_recession_economy,says_depression_coronavirus_economy,warns_recession_coronavirus,crisis_economy_coronavirus_lockdown,coronavirus_potential_case_economy,lawmaker_coronavirus_economy_factor,nan
0,Coronavirus Fears Spread: What Are The Market Implications ?,Latin America faces 'lost decade' from coronavirus GDP drop,Coronavirus To Hurt Economic Growth In Many Countries: Moodys,Coronavirus and the latest indicators for the UK economy and society: 4 June 2020,Trump says US economy could take hit from coronavirus,Fed Chair Powell warns of prolonged U.S. recession after coronavirus,Coronavirus crisis: Lockdown batters India's already slowing economy,CORONAVIRUS: Best and worst case scenarios for global economy,Iranian Lawmaker: Hang Those Who Knowingly Spread Coronavirus,
1,When The Coronavirus Outbreak Creates A Panic Buying Boom For Your Product,The economic fallout of coronavirus,Fed chairman: Coronavirus 'likely' to hinder economic growth in China,Coronavirus: Evening update as UK faces 'significant recession' BBC News,UPDATE 1 Trump says economy could take hit from coronavirus,"Coronavirus recession may already be here, market strategist says",Can extending India's lockdown stop coronavirus ?,GnS: Coronavirus Has The Potential To Trigger A Global Depression,"Iran's economy a factor in coronavirus response, President Rouhani says",
2,European yields extend falls as investors assess coronavirus impact,Coronavirus: Atwoli in plea for rescue of economy,Coronavirus Outbreak Could Damage China's Economy: Sri Mulyani,Coronavirus and the latest indicators for the UK economy and society: 18 June 2020,Can Trump Avoid a Post Coronavirus Great Depression ?,Global recession risks have risen due to coronavirus Moody's,Coronavirus: Is it too early to ease lockdown restrictions in India ? DW 01.06.2020,Coronavirus recession: How bad it could get and what it means for you,Iran to use 20% of state budget to fight coronavirus,
3,Commercial brokers say coronavirus not having a big impact yet,Coronavirus: Eight charts on how it has shaken economies,Wuhan coronavirus to have minimum effect on Ukraine's economy ministry,Singapore economy forecast to shrink 0.8% in 2020 from coronavirus hit: SP,Trump vows coronavirus ravaged economy will 'bounce' back,Recession Risks: Coronavirus + Oil Shock + What Else ?,India coronavirus lockdown day 6 ICMR denies community transmission,,,
4,Covid 19 Coronavirus: NZ stockmarket dips back on the rollercoaster,"The Economic Hit From Coronavirus Will Be Worst Since The Great Depression, IMF Warns",Why Coronavirus outbreak in China is bad news for economy,UK economy shrinks 2% in first quarter on coronavirus,Trump says economy could take hit from coronavirus,Big banks bracing for coronavirus to cause first recession in 29 years,,,,


## Construction Pattern Generator

In [None]:
example_category = data_clustered[data_clustered['label_st1']==31].reset_index(drop=True)
example_category 

In [144]:
corpus = ['CDC reports 3,416,428 coronavirus cases',
 'U.S. coronavirus cases surpass 2.5 million: Reuters tally',
 'U.S. CDC reports 2,459,472 coronavirus cases',
 'U.S. CDC reports 4,024,492 coronavirus cases',
 'U.S. CDC reports 3,416,428 coronavirus cases',
 'U.S. surpasses 4 million confirmed coronavirus cases',
 'US hits 2.5M coronavirus cases as states tally record one day highs',
 "US' coronavirus tally crosses 2.5 million mark, toll at 125,480"]


In [72]:
corpus = pd_cluster[pd_cluster['category']=='postponed_anniversary_coronavirus_fear']['Processed_Title'].tolist()
corpus[:20]

['Cinderella Musical Postponed Due to Coronavirus Fears',
 "Coronavirus: Celebrations to mark 3rd anniversary of BJP govt in U'khand postponed",
 "'Bollywood Oscars' called off over coronavirus fears",
 'India EU summit postponed due to coronavirus',
 'Skysports awards suffers coronavirus postponement',
 'Oscars postponed due to coronavirus scourge',
 "Aamir Khan 'cancels' birthday celebrations amid coronavirus scare",
 'Coronavirus in India live updates: Attari Wagah border ceremony suspended',
 'LA Fashion Week delayed due to coronavirus',
 'Upcoming Padma Awards ceremony postponed in wake of coronavirus outbreak',
 'Peabody Awards Postponed Due to Coronavirus Pandemic',
 'Coronavirus in India Wagah beating retreat ceremony suspended',
 'Coronavirus: Celebrations to mark 3rd anniversary of BJP govt in Uttarakhand postponed',
 'Film academy delays 2021 Oscars ceremony because of coronavirus CBC.ca']

In [73]:
corpus_embeddings = embedder.encode(corpus)
# Normalize the embeddings to unit length
corpus_embeddings = corpus_embeddings /  np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)

from sklearn.metrics.pairwise import cosine_similarity

similarities = cosine_similarity(
    [corpus_embeddings[0]],
    corpus_embeddings[1:]
)
similarities

array([[0.43334538, 0.58618796, 0.5104109 , 0.4702193 , 0.66981226,
        0.36607003, 0.44138527, 0.5668489 , 0.5606787 , 0.6399028 ,
        0.44455236, 0.43352965, 0.58175826]], dtype=float32)

In [74]:
simi_list = similarities[0].tolist()
simi_list

[0.43334537744522095,
 0.5861879587173462,
 0.5104109048843384,
 0.47021931409835815,
 0.6698122620582581,
 0.366070032119751,
 0.44138526916503906,
 0.566848874092102,
 0.5606787204742432,
 0.6399027705192566,
 0.44455236196517944,
 0.43352964520454407,
 0.5817582607269287]

In [156]:
#example_doc = nlp(list(example_category['Processed_Title'])[1])

for i in corpus:
    example_doc = nlp(i)

    #print(f'{example_doc}\n')

    #for token in example_doc:
        #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_ , token.is_stop)

example_doc

US' coronavirus tally crosses 2.5 million mark, toll at 125,480

In [75]:
options = {"compact": True, "color":"green", "arrow_width":8, "arrow_spacing":12, 
           "word_spacing":15, "distance":150, "collapse_phrases":False}
simi_list = similarities[0].tolist()

for i in range(1):
    doc = nlp(corpus[i])
        
for i in range(1):
    print(corpus[i])
    displacy.render(doc, style="dep", options=options)

print('--------------------------------------------------------------------------')

for n in range(len(corpus)-1):
    i = simi_list[n-1]
    x = corpus[n+1]
    doc = nlp(x)
    

    print('{}\t\t{}'.format(x, i))
    displacy.render(doc, style="dep", options=options)
    print('--------------------------------------------------------------------------')
    

Cinderella Musical Postponed Due to Coronavirus Fears


--------------------------------------------------------------------------
Coronavirus: Celebrations to mark 3rd anniversary of BJP govt in U'khand postponed		0.5817582607269287


--------------------------------------------------------------------------
'Bollywood Oscars' called off over coronavirus fears		0.43334537744522095


--------------------------------------------------------------------------
India EU summit postponed due to coronavirus		0.5861879587173462


--------------------------------------------------------------------------
Skysports awards suffers coronavirus postponement		0.5104109048843384


--------------------------------------------------------------------------
Oscars postponed due to coronavirus scourge		0.47021931409835815


--------------------------------------------------------------------------
Aamir Khan 'cancels' birthday celebrations amid coronavirus scare		0.6698122620582581


--------------------------------------------------------------------------
Coronavirus in India live updates: Attari Wagah border ceremony suspended		0.366070032119751


--------------------------------------------------------------------------
LA Fashion Week delayed due to coronavirus		0.44138526916503906


--------------------------------------------------------------------------
Upcoming Padma Awards ceremony postponed in wake of coronavirus outbreak		0.566848874092102


--------------------------------------------------------------------------
Peabody Awards Postponed Due to Coronavirus Pandemic		0.5606787204742432


--------------------------------------------------------------------------
Coronavirus in India Wagah beating retreat ceremony suspended		0.6399027705192566


--------------------------------------------------------------------------
Coronavirus: Celebrations to mark 3rd anniversary of BJP govt in Uttarakhand postponed		0.44455236196517944


--------------------------------------------------------------------------
Film academy delays 2021 Oscars ceremony because of coronavirus CBC.ca		0.43352964520454407


--------------------------------------------------------------------------


In [65]:
options = {"compact": True, "color":"green", "arrow_width":8, "arrow_spacing":12, 
           "word_spacing":15, "distance":150, "collapse_phrases":False}
simi_list = similarities[0].tolist()

for i in range(1):
    doc = nlp(corpus[i])
        
for i in range(1):
    print(corpus[i])
    

print('--------------------------------------------------------------------------')

for n in range(len(corpus)-1):
    i = simi_list[n-1]
    x = corpus[n+1]
    doc = nlp(x)
    

    print('{}\t\t{}'.format(x, i))
    
    print('--------------------------------------------------------------------------')
    

Australian scientists grow copy of coronavirus in lab, called 'significant breakthrough'
--------------------------------------------------------------------------
Australian Scientists First to Grow Novel Coronavirus Outside China		0.6825590133666992
--------------------------------------------------------------------------
Global breakthrough as Australian lab grows coronavirus		0.8677197694778442
--------------------------------------------------------------------------
Aust grown coronavirus: why it's important		0.9021153450012207
--------------------------------------------------------------------------
'We got it': The game changing moment when this Australian lab recreated coronavirus		0.811951756477356
--------------------------------------------------------------------------
Aust scientists grow coronavirus in lab		0.8157276511192322
--------------------------------------------------------------------------
Scientists in Australia recreate coronavirus in potential 'game change

## Concept Entity Relationship Generator

## Noun Chuncks Extractor

## Frame Aspects Summarizer

## NER Tagger

## Lexical Units Extender

In [None]:
# Word2Vec

## Most Frequent Words by POS

In [3]:
with open("D://NLP//Frame_NLP//archive//covid19_title.csv", encoding='utf8') as f:
    text = f.read().replace("\n\n", " ").replace("\n", " ")
doc = nlp(text)
sentences = list(doc.sents)
from collections import Counter
verbs = [ token.text for token in doc if token.is_stop != True and token.is_punct !=True and token.pos_ == 'VERB']
nouns = [ token.text for token in doc if token.is_stop != True and token.is_punct !=True and token.pos_ == 'NOUN']
adjs = [ token.text for token in doc if token.is_stop != True and token.is_punct !=True and token.pos_ == 'ADJ']
advs = [ token.text for token in doc if token.is_stop != True and token.is_punct !=True and token.pos_ == 'ADV']
print(Counter(verbs).most_common(20))
print(Counter(nouns).most_common(20))
print(Counter(adjs).most_common(20))
print(Counter(advs).most_common(20))

[('says', 42), ('confirms', 28), ('records', 24), ('tests', 22), ('fight', 19), ('dies', 14), ('rises', 13), ('hits', 11), ('announces', 11), ('reports', 11), ('confirmed', 11), ('urges', 10), ('stay', 10), ('warns', 9), ('reveals', 9), ('work', 9), ('reopen', 9), ('Says', 9), ('Help', 8), ('donates', 8)]
[('coronavirus', 186), ('cases', 122), ('outbreak', 53), ('patients', 35), ('lockdown', 29), ('death', 29), ('Cases', 27), ('deaths', 25), ('virus', 23), ('PM', 22), ('govt', 20), ('toll', 19), ('test', 19), ('pandemic', 17), ('hospital', 15), ('case', 14), ('masks', 14), ('state', 13), ('patient', 13), ('day', 13)]
[('new', 74), ('positive', 46), ('COVID19', 40), ('total', 21), ('social', 15), ('second', 11), ('old', 9), ('medical', 8), ('negative', 5), ('public', 5), ('low', 5), ('pandemic', 5), ('innovative', 4), ('single', 4), ('Spanish', 4), ('daily', 4), ('global', 4), ('Indian', 4), ('New', 4), ('federal', 4)]
[('far', 4), ('hard', 4), ('worldwide', 4), ('latest', 4), ('away', 

## Noun Chunks

In [11]:
chunks = (list(doc.noun_chunks))
for chunk in chunks:
    if "social" in str(chunk):
        print (chunk)

social distancing
'inappropriate' social media posts
social stock exchanges
social media
virus social distancing
social distancing
social distancing
social distancing
social distancing
federal social distancing guidelines
the last major social platforms
social distancing
just a social movement
