In [None]:
import pandas as pd
import re
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

file_path = '/content/drive/MyDrive/499 dataset/BlackLivesMatter.csv'
data = pd.read_csv(file_path)

data['posts'] = data['posts'].astype(str)

def preprocess_text(text):
    text = re.sub(r'\W', ' ', text)
    text = text.lower()
    text = ' '.join(word for word in text.split() if word not in stop_words)
    return text

data['processed_text'] = data['posts'].apply(preprocess_text)


political_keywords = set([
    'government', 'policy', 'election', 'politics', 'senate', 'congress', 'democracy', 'republican', 'democrat',
    'voting', 'campaign', 'legislation', 'constitution', 'parliament', 'governance', 'candidate', 'ballot', 'lawmaker',
    'bureaucracy', 'federal', 'state', 'local', 'political party', 'politician', 'lobby', 'diplomacy', 'diplomat',
    'foreign policy', 'domestic policy', 'executive', 'judicial', 'legislative', 'administration', 'cabinet', 'minister',
    'secretary', 'public office', 'public servant', 'civil servant', 'referendum', 'initiative', 'reform', 'law', 'legal',
    'justice', 'regulation', 'compliance', 'jurisdiction', 'litigation', 'arbitration', 'mediation', 'advocacy', 'activism',
    'protest', 'rally', 'march', 'demonstration', 'strike', 'boycott', 'petition', 'grassroots', 'movement', 'rights',
    'freedom', 'liberty', 'equality', 'justice', 'oppression', 'repression', 'resistance', 'revolution', 'insurrection',
    'coup', 'rebellion', 'uprising', 'insurgency', 'autocracy', 'dictatorship', 'tyranny', 'authoritarianism', 'totalitarianism',
    'monarchy', 'republic', 'federalism', 'centralism', 'decentralization', 'devolution', 'secession', 'independence',
    'sovereignty', 'nation', 'state', 'country', 'territory', 'socialism', 'communism', 'capitalism', 'liberalism', 'conservatism',
    'progressivism', 'libertarianism', 'anarchism', 'populism', 'nationalism', 'patriotism', 'neoliberalism', 'neoconservatism',
    'environmentalism', 'feminism', 'multiculturalism', 'globalism', 'protectionism', 'isolationism', 'imperialism', 'colonialism',
    'industrialism', 'postindustrialism', 'urbanism', 'ruralism', 'localism', 'regionalism', 'globalization', 'privatization',
    'nationalization', 'centralization', 'decentralization', 'modernization', 'traditionalism', 'radicalism', 'reformism',
    'revolutionism', 'reactionism', 'extremism', 'fundamentalism', 'militarism', 'pacifism', 'humanitarianism', 'secularism',
    'religious fundamentalism', 'islamism', 'christian fundamentalism', 'zionism', 'hindu nationalism', 'buddhist nationalism',
    'atheism', 'agnosticism', 'spiritualism', 'pantheism', 'deism', 'transhumanism', 'posthumanism', 'cybernetics', 'biopolitics',
    'genetics', 'climate change', 'sustainability', 'renewable energy', 'green technology', 'urbanization', 'smart cities',
    'public policy', 'public health', 'social policy', 'economic policy', 'foreign aid', 'human rights', 'civil rights',
    'animal rights', 'privacy rights', 'intellectual property rights', 'land rights', 'water rights', 'resource rights',
    'women rights', 'LGBTQ rights', 'disability rights', 'indigenous rights', 'workers rights', 'consumer rights',
    'patient rights', 'immigrant rights', 'refugee rights', 'asylum rights', 'privacy', 'security', 'surveillance', 'intelligence',
    'cybersecurity', 'terrorism', 'counterterrorism', 'war', 'peace', 'conflict', 'resolution', 'negotiation', 'diplomacy',
    'sanctions', 'trade', 'tariffs', 'embargoes', 'economic growth', 'economic development', 'poverty alleviation', 'welfare',
    'social security', 'healthcare', 'education', 'housing', 'transportation', 'infrastructure', 'telecommunications', 'technology',
    'innovation', 'research', 'development', 'science', 'engineering', 'mathematics', 'arts', 'humanities', 'sports', 'recreation',
    'tourism', 'travel', 'culture', 'heritage', 'tradition', 'custom', 'community', 'society', 'values', 'beliefs', 'identity',
    'language', 'religion', 'faith', 'spirituality', 'mythology', 'ritual', 'ceremony', 'festival', 'celebration', 'art', 'music',
    'dance', 'theater', 'literature', 'poetry', 'prose', 'storytelling', 'folklore', 'craft', 'handicraft', 'architecture',
    'monument', 'landmark', 'historical site', 'museum', 'archive', 'library', 'exhibition', 'gallery', 'performance', 'film',
    'cinema', 'media', 'journalism', 'publishing', 'broadcasting', 'television', 'radio', 'internet', 'digital media', 'social media',
    'networking', 'communication', 'interaction', 'dialogue', 'discourse', 'debate', 'discussion', 'conversation', 'meeting',
    'assembly', 'conference', 'seminar', 'workshop', 'training', 'education', 'learning', 'knowledge', 'wisdom', 'understanding',
    'insight', 'awareness', 'perception', 'perspective', 'viewpoint', 'opinion', 'belief', 'conviction', 'credo', 'faith', 'dogma',
    'ideology', 'philosophy', 'worldview', 'principles', 'values', 'standards', 'morals', 'ethics', 'integrity', 'honor'
])

counter_movement_keywords = set([
    'all lives matter', 'blue lives matter', 'antifa', 'back the blue', 'police lives matter', 'right wing', 'conservative',
    'nationalist', 'patriot', 'alt-right', 'far-right', 'neo-nazi', 'white supremacy', 'racism', 'racialist', 'anti-immigrant',
    'anti-refugee', 'anti-diversity', 'segregation', 'ethnocentrism', 'nativism', 'xenophobia', 'islamophobia', 'homophobia',
    'transphobia', 'heteronormativity', 'reactionary', 'retrogressive', 'traditionalist', 'fundamentalist', 'ultranationalist',
    'national socialism', 'fascism', 'authoritarianism', 'dictatorship', 'totalitarianism', 'monarchy', 'oligarchy', 'plutocracy',
    'corporatism', 'capitalism', 'libertarianism', 'anarcho-capitalism', 'objectivism', 'individualism', 'collectivism',
    'statism', 'centralism', 'decentralization', 'devolution', 'secession', 'independence', 'sovereignty', 'nation', 'state',
    'country', 'territory', 'border control', 'immigration', 'emigration', 'citizenship', 'national identity', 'ethnicity',
    'race', 'culture', 'heritage', 'tradition', 'custom', 'society', 'values', 'beliefs', 'identity', 'patriotism', 'loyalty',
    'allegiance', 'fidelity', 'devotion', 'faith', 'creed', 'dogma', 'ideology', 'worldview', 'philosophy', 'principles',
    'standards', 'morals', 'ethics', 'integrity', 'honor', 'dignity', 'pride'
])

cultural_keywords = set([
    'community', 'culture', 'heritage', 'tradition', 'custom', 'society', 'values', 'beliefs', 'identity', 'language',
    'religion', 'faith', 'spirituality', 'mythology', 'ritual', 'ceremony', 'festival', 'celebration', 'art', 'music',
    'dance', 'theater', 'literature', 'poetry', 'prose', 'storytelling', 'folklore', 'craft', 'handicraft', 'architecture',
    'monument', 'landmark', 'historical site', 'museum', 'archive', 'library', 'exhibition', 'gallery', 'performance',
    'film', 'cinema', 'media', 'journalism', 'publishing', 'broadcasting', 'television', 'radio', 'internet', 'digital media',
    'social media', 'networking', 'communication', 'interaction', 'dialogue', 'discourse', 'debate', 'discussion',
    'conversation', 'meeting', 'assembly', 'conference', 'seminar', 'workshop', 'training', 'education', 'learning',
    'knowledge', 'wisdom', 'understanding', 'insight', 'awareness', 'perception', 'perspective', 'viewpoint', 'opinion',
    'belief', 'conviction', 'credo', 'faith', 'dogma', 'ideology', 'philosophy', 'worldview', 'principles', 'values',
    'standards', 'morals', 'ethics', 'integrity', 'honor', 'justice', 'fairness', 'equality', 'liberty', 'freedom',
    'rights', 'responsibilities', 'duties', 'obligations', 'accountability', 'transparency', 'trust', 'loyalty', 'allegiance',
    'patriotism', 'nationalism', 'cosmopolitanism', 'multiculturalism', 'diversity', 'inclusion', 'equity', 'equality',
    'fairness', 'justice', 'rights', 'responsibilities', 'duties', 'obligations', 'accountability', 'transparency',
    'trust', 'loyalty', 'allegiance', 'patriotism', 'nationalism', 'cosmopolitanism', 'multiculturalism', 'diversity',
    'inclusion', 'equity', 'pluralism', 'integration', 'assimilation', 'acculturation', 'biculturalism', 'cross-cultural',
    'intercultural', 'transcultural', 'globalization', 'localization', 'glocalization', 'hybridity', 'syncretism',
    'diaspora', 'migration', 'immigration', 'emigration', 'refugees', 'asylum seekers', 'expatriates', 'tourism', 'travel',
    'exploration', 'adventure', 'discovery', 'pilgrimage', 'voyage', 'journey', 'odyssey', 'expedition', 'trek',
    'safari', 'cruise', 'excursion', 'outing', 'trip', 'holiday', 'vacation', 'retreat', 'resort', 'residency',
    'habitat', 'dwelling', 'home', 'household', 'family', 'kinship', 'lineage', 'clan', 'tribe', 'ethnicity',
    'race', 'gender', 'sexuality', 'orientation', 'identity', 'expression', 'roles', 'relations', 'patriarchy',
    'matriarchy', 'egalitarianism', 'class', 'status', 'stratification', 'mobility', 'opportunity', 'privilege',
    'power', 'authority', 'domination', 'oppression', 'resistance', 'revolution', 'rebellion', 'insurrection',
    'uprising', 'coup', 'war', 'conflict', 'violence', 'peace', 'reconciliation', 'resolution', 'mediation', 'negotiation',
    'diplomacy', 'cooperation', 'collaboration', 'partnership', 'alliance', 'solidarity', 'unity', 'community', 'society',
    'group', 'organization', 'institution', 'association', 'union', 'guild', 'network', 'movement', 'campaign',
    'initiative', 'project', 'program', 'plan', 'strategy', 'policy', 'procedure', 'process', 'protocol', 'system',
    'structure', 'framework', 'model', 'paradigm', 'theory', 'concept', 'idea', 'innovation', 'invention', 'discovery',
    'creation', 'design', 'development', 'evolution', 'progress', 'change', 'transformation', 'reform', 'renewal',
    'renaissance', 'revival', 'resurgence', 'resistance', 'revolution', 'rebellion', 'insurrection', 'uprising',
    'coup', 'war', 'conflict', 'violence', 'peace', 'reconciliation', 'resolution', 'mediation', 'negotiation',
    'diplomacy', 'cooperation', 'collaboration', 'partnership', 'alliance', 'solidarity', 'unity', 'community',
    'society', 'group', 'organization', 'institution', 'association', 'union', 'guild', 'network', 'movement',
    'campaign', 'initiative', 'project', 'program', 'plan', 'strategy', 'policy', 'procedure', 'process', 'protocol',
    'system', 'structure', 'framework', 'model', 'paradigm', 'theory', 'concept', 'idea', 'innovation', 'invention',
    'discovery', 'creation', 'design', 'development', 'evolution', 'progress', 'change', 'transformation', 'reform',
    'renewal', 'renaissance', 'revival', 'resurgence', 'tradition', 'custom', 'ritual', 'ceremony', 'festival',
    'celebration', 'holiday', 'observance', 'anniversary', 'jubilee', 'commemoration', 'memorial', 'tribute', 'homage',
    'honor', 'respect', 'admiration', 'reverence', 'worship', 'praise', 'thanksgiving', 'gratitude', 'appreciation',
    'recognition', 'acknowledgment', 'validation', 'endorsement', 'support', 'encouragement', 'motivation', 'inspiration',
    'aspiration', 'ambition', 'goal', 'dream', 'vision', 'hope', 'wish', 'desire', 'yearning', 'longing', 'passion',
    'enthusiasm', 'zeal', 'vigor', 'energy', 'drive', 'determination', 'perseverance', 'resilience', 'tenacity',
    'fortitude', 'courage', 'bravery', 'boldness', 'daring', 'adventurousness', 'initiative', 'enterprise', 'innovation',
    'creativity', 'imagination', 'ingenuity', 'resourcefulness', 'originality', 'novelty', 'uniqueness', 'individuality',
    'distinctiveness', 'character', 'personality', 'identity', 'self', 'soul', 'spirit', 'essence', 'core', 'heart',
    'mind', 'intellect', 'reason', 'logic', 'rationality', 'wisdom', 'knowledge', 'understanding', 'insight',
    'awareness', 'consciousness', 'perception', 'observation', 'experience', 'memory', 'remembrance', 'recall',
    'recognition', 'appreciation', 'understanding', 'comprehension', 'interpretation', 'analysis', 'evaluation',
    'judgment', 'decision', 'choice', 'selection', 'preference', 'bias', 'prejudice', 'stereotype', 'discrimination',
    'bigotry', 'intolerance', 'hate', 'anger', 'fear', 'anxiety', 'worry', 'concern', 'doubt', 'uncertainty',
    'confusion', 'bewilderment', 'puzzlement', 'curiosity', 'interest', 'fascination', 'intrigue', 'wonder',
    'amazement', 'awe', 'reverence', 'respect', 'admiration', 'appreciation', 'gratitude', 'thanks', 'thankfulness',
    'gratefulness', 'appreciation', 'recognition', 'acknowledgment', 'validation', 'endorsement', 'support',
    'encouragement', 'motivation', 'inspiration', 'aspiration', 'ambition', 'goal', 'dream', 'vision', 'hope', 'wish',
    'desire', 'yearning', 'longing', 'passion', 'enthusiasm', 'zeal', 'vigor', 'energy', 'drive', 'determination',
    'perseverance', 'resilience', 'tenacity', 'fortitude', 'courage', 'bravery', 'boldness'
])

legality_keywords = set([
    'law', 'legal', 'court', 'rights', 'justice', 'legislation', 'regulation', 'compliance', 'jurisdiction', 'litigation',
    'arbitration', 'mediation', 'advocacy', 'activism', 'protest', 'rally', 'march', 'demonstration', 'strike', 'boycott',
    'petition', 'grassroots', 'movement', 'civil rights', 'human rights', 'freedom', 'liberty', 'equality', 'fairness',
    'impartiality', 'objectivity', 'neutrality', 'due process', 'rule of law', 'legal system', 'judicial system', 'court system',
    'criminal law', 'civil law', 'common law', 'statutory law', 'constitutional law', 'administrative law', 'international law',
    'humanitarian law', 'environmental law', 'corporate law', 'commercial law', 'contract law', 'labor law', 'employment law',
    'family law', 'immigration law', 'intellectual property law', 'property law', 'tax law', 'tort law', 'trust law', 'welfare law',
    'health law', 'education law', 'consumer law', 'banking law', 'financial law', 'securities law', 'antitrust law', 'competition law',
    'communications law', 'energy law', 'transportation law', 'maritime law', 'aerospace law', 'defense law', 'cyber law',
    'data protection law', 'privacy law', 'information law', 'media law', 'entertainment law', 'sports law', 'gaming law',
    'food law', 'drug law', 'alcohol law', 'tobacco law', 'firearms law', 'explosives law', 'chemical law', 'biological law',
    'nuclear law', 'radiation law', 'environmental protection law', 'conservation law', 'wildlife law', 'animal law', 'agricultural law',
    'land use law', 'zoning law', 'urban planning law', 'rural law', 'indigenous law', 'tribal law', 'military law', 'martial law',
    'emergency law', 'crisis law', 'war law', 'peace law', 'disarmament law', 'arms control law', 'weapons law', 'terrorism law',
    'security law', 'surveillance law', 'intelligence law', 'espionage law', 'counterterrorism law', 'counterintelligence law',
    'extradition law', 'asylum law', 'refugee law', 'migration law', 'citizenship law', 'nationality law', 'police law', 'public safety law',
    'fire law', 'emergency medical services law', 'health and safety law', 'occupational safety law', 'workplace safety law',
    'insurance law', 'pension law', 'retirement law', 'disability law', 'workers compensation law', 'veterans law', 'social security law',
    'public benefits law', 'housing law', 'homelessness law', 'landlord-tenant law', 'real estate law', 'mortgage law', 'foreclosure law',
    'bankruptcy law', 'debt collection law', 'credit law', 'usury law', 'banking regulation', 'financial regulation', 'securities regulation',
    'corporate governance law', 'mergers and acquisitions law', 'competition regulation', 'antitrust regulation', 'trade law', 'customs law',
    'tariff law', 'import-export law', 'international trade law', 'investment law', 'taxation law', 'international taxation law', 'tax compliance law',
    'tax avoidance law', 'tax evasion law', 'tax planning law', 'tax litigation law', 'tax fraud law', 'criminal procedure law', 'civil procedure law',
    'evidence law', 'proof law', 'burden of proof law', 'standard of proof law', 'presumption law', 'inference law', 'deduction law', 'induction law',
    'analogy law', 'interpretation law', 'construction law', 'application law', 'enforcement law', 'implementation law', 'execution law', 'performance law',
    'breach law', 'remedies law', 'damages law', 'compensation law', 'restitution law', 'equity law', 'injunction law', 'specific performance law',
    'declaratory judgment law', 'rescission law', 'reformation law', 'rectification law', 'accounting law', 'trusts law', 'fiduciary law', 'trusteeship law',
    'beneficiary law', 'trust instrument law', 'trust deed law', 'trust agreement law', 'testamentary trust law', 'inter vivos trust law', 'discretionary trust law',
    'charitable trust law', 'spendthrift trust law', 'constructive trust law', 'resulting trust law', 'secret trust law', 'implied trust law', 'express trust law',
    'trustee duties law', 'trustee powers law', 'trustee liabilities law', 'trustee responsibilities law', 'trustee rights law', 'trustee obligations law',
    'trustee protections law', 'trustee immunities law', 'trustee defenses law', 'trustee claims law', 'trustee actions law', 'trustee proceedings law',
    'trustee disputes law', 'trustee settlements law', 'trustee resolutions law', 'trustee judgments law', 'trustee orders law', 'trustee decisions law',
    'trustee applications law', 'trustee submissions law', 'trustee evidence law', 'trustee testimony law', 'trustee arguments law', 'trustee hearings law',
    'trustee trials law', 'trustee appeals law', 'trustee reviews law', 'trustee assessments law', 'trustee evaluations law', 'trustee investigations law',
    'trustee audits law', 'trustee reports law', 'trustee records law', 'trustee accounts law', 'trustee funds law', 'trustee assets law', 'trustee liabilities law',
    'trustee expenditures law', 'trustee revenues law', 'trustee profits law', 'trustee losses law', 'trustee investments law', 'trustee disbursements law',
    'trustee distributions law', 'trustee payments law', 'trustee receipts law', 'trustee transactions law', 'trustee transfers law', 'trustee settlements law',
    'trustee adjustments law', 'trustee corrections law', 'trustee errors law', 'trustee frauds law', 'trustee misrepresentations law', 'trustee omissions law',
    'trustee breaches law', 'trustee defaults law', 'trustee negligence law', 'trustee misconduct law', 'trustee malfeasance law', 'trustee nonfeasance law',
    'trustee misfeasance law', 'trustee liabilities law', 'trustee responsibilities law', 'trustee rights law', 'trustee obligations law', 'trustee protections law',
    'trustee immunities law', 'trustee defenses law', 'trustee claims law', 'trustee actions law', 'trustee proceedings law', 'trustee disputes law', 'trustee settlements law',
    'trustee resolutions law', 'trustee judgments law', 'trustee orders law', 'trustee decisions law', 'trustee applications law', 'trustee submissions law', 'trustee evidence law',
    'trustee testimony law', 'trustee arguments law', 'trustee hearings law', 'trustee trials law', 'trustee appeals law', 'trustee reviews law', 'trustee assessments law', 'trustee evaluations law',
    'trustee investigations law', 'trustee audits law', 'trustee reports law', 'trustee records law', 'trustee accounts law', 'trustee funds law', 'trustee assets law', 'trustee liabilities law', 'trustee expenditures law',
    'trustee revenues law', 'trustee profits law', 'trustee losses law', 'trustee investments law', 'trustee disbursements law', 'trustee distributions law', 'trustee payments law', 'trustee receipts law', 'trustee transactions law',
    'trustee transfers law', 'trustee settlements law', 'trustee adjustments law', 'trustee corrections law', 'trustee errors law', 'trustee frauds law', 'trustee misrepresentations law', 'trustee omissions law', 'trustee breaches law',
    'trustee defaults law', 'trustee negligence law', 'trustee misconduct law', 'trustee malfeasance law', 'trustee nonfeasance law', 'trustee misfeasance law', 'legal precedent', 'case law', 'stare decisis', 'binding precedent',
    'persuasive precedent', 'analogical reasoning', 'distinguishing', 'overruling', 'reversing', 'modifying', 'following', 'disapproving', 'criticizing', 'extending', 'limiting', 'refining', 'clarifying', 'confirming', 'approving',
    'endorsing', 'adopting', 'applying', 'enforcing', 'implementing', 'executing', 'observing', 'respecting', 'complying', 'violating', 'breaching', 'transgressing', 'offending', 'contravening', 'infringing', 'invading', 'encroaching',
])

def calculate_score(text, keywords):
    words = set(text.split())
    matches = words & keywords
    return len(matches) / len(words) if words else 0

all_text = ' '.join(data['processed_text'])

overall_scores = {
    'Political_Nature': calculate_score(all_text, political_keywords) * 100,
    'Counter_Movement_Activity': calculate_score(all_text, counter_movement_keywords) * 100,
    'Cultural_Relevance': calculate_score(all_text, cultural_keywords) * 100,
    'Legality_Nature': calculate_score(all_text, legality_keywords) * 100
}

vectorizer = CountVectorizer().fit_transform([all_text])
vectors = vectorizer.toarray()
cosine_similarities = cosine_similarity(vectors)

print("Overall NLP Analysis Scores:")
print(overall_scores)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Overall NLP Analysis Scores:
{'Political_Nature': 3.4411276948590386, 'Counter_Movement_Activity': 0.7048092868988391, 'Cultural_Relevance': 4.601990049751244, 'Legality_Nature': 0.5804311774461027}
