# Mapping Sections To Categories

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#How-Many-NULL-Section-for-2018-2020?" data-toc-modified-id="How-Many-NULL-Section-for-2018-2020?-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>How Many NULL Section for 2018-2020?</a></span><ul class="toc-item"><li><span><a href="#If-we-drop-the-NULL-Sections,-how-many-per-publication-remains?" data-toc-modified-id="If-we-drop-the-NULL-Sections,-how-many-per-publication-remains?-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>If we drop the NULL Sections, how many per publication remains?</a></span></li></ul></li><li><span><a href="#Generalizing-the-Categories-For-Non-Null-Sections" data-toc-modified-id="Generalizing-the-Categories-For-Non-Null-Sections-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Generalizing the Categories For Non-Null Sections</a></span><ul class="toc-item"><li><span><a href="#Mapping-Categories-for-Reuters" data-toc-modified-id="Mapping-Categories-for-Reuters-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Mapping Categories for Reuters</a></span></li><li><span><a href="#Mapping-Categories-for-CNBC" data-toc-modified-id="Mapping-Categories-for-CNBC-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Mapping Categories for CNBC</a></span></li><li><span><a href="#Mapping-Categories-for-The-New-York-Times" data-toc-modified-id="Mapping-Categories-for-The-New-York-Times-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Mapping Categories for The New York Times</a></span></li><li><span><a href="#Mapping-Categories-for-CNN" data-toc-modified-id="Mapping-Categories-for-CNN-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Mapping Categories for CNN</a></span></li><li><span><a href="#Mapping-Categories-for-People" data-toc-modified-id="Mapping-Categories-for-People-2.5"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>Mapping Categories for People</a></span></li><li><span><a href="#Mapping-Categories-for-Vice" data-toc-modified-id="Mapping-Categories-for-Vice-2.6"><span class="toc-item-num">2.6&nbsp;&nbsp;</span>Mapping Categories for Vice</a></span></li><li><span><a href="#Mapping-Categories-for-The-Verge" data-toc-modified-id="Mapping-Categories-for-The-Verge-2.7"><span class="toc-item-num">2.7&nbsp;&nbsp;</span>Mapping Categories for The Verge</a></span></li><li><span><a href="#Mapping-Categories-for-Fox-News" data-toc-modified-id="Mapping-Categories-for-Fox-News-2.8"><span class="toc-item-num">2.8&nbsp;&nbsp;</span>Mapping Categories for Fox News</a></span></li></ul></li><li><span><a href="#Adding-New-Category-Column-to-AllTheNews-DB" data-toc-modified-id="Adding-New-Category-Column-to-AllTheNews-DB-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Adding New <code>Category</code> Column to AllTheNews DB</a></span><ul class="toc-item"><li><span><a href="#Update-Script-for-Reuters" data-toc-modified-id="Update-Script-for-Reuters-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Update Script for Reuters</a></span></li><li><span><a href="#Update-Script-for-CNBC" data-toc-modified-id="Update-Script-for-CNBC-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Update Script for CNBC</a></span></li><li><span><a href="#Update-Script-for-The-New-York-Times" data-toc-modified-id="Update-Script-for-The-New-York-Times-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Update Script for The New York Times</a></span></li><li><span><a href="#Update-Script-for-CNN" data-toc-modified-id="Update-Script-for-CNN-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>Update Script for CNN</a></span></li><li><span><a href="#Updated-Script-for-People" data-toc-modified-id="Updated-Script-for-People-3.5"><span class="toc-item-num">3.5&nbsp;&nbsp;</span>Updated Script for People</a></span></li><li><span><a href="#Updated-Script-for-Vice" data-toc-modified-id="Updated-Script-for-Vice-3.6"><span class="toc-item-num">3.6&nbsp;&nbsp;</span>Updated Script for Vice</a></span></li><li><span><a href="#Updated-Script-for-The-Verge" data-toc-modified-id="Updated-Script-for-The-Verge-3.7"><span class="toc-item-num">3.7&nbsp;&nbsp;</span>Updated Script for The Verge</a></span></li></ul></li></ul></div>

In [1]:
from sqlalchemy import create_engine   # conda install -c anaconda sqlalchemy
from dotenv import load_dotenv         # conda install -c conda-forge python-dotenv
import os                              # Python default package
import pandas as pd

In [2]:
pd.options.display.max_rows = 1000
pd.set_option('max_colwidth', 400)

In [3]:
load_dotenv() # => True if no error

True

In [4]:
# Load secrets from the .env file
db_name = os.getenv("db_name")
db_username = os.getenv("db_username")
db_password = os.getenv("db_password")
db_table_schema = os.getenv("db_table_schema")
connection_string = f"postgres://{db_username}:{db_password}@localhost:5432/{db_name}"
engine = create_engine(connection_string)

In [5]:
# List of available tables in the DB
q = """
SELECT * 
FROM information_schema.tables
WHERE table_catalog = '{db_name}'
AND table_schema = '{db_table_schema}';
""".format(
    db_name = db_name,
    db_table_schema = db_table_schema
)

pd.read_sql(q, con=engine)[["table_name"]]

Unnamed: 0,table_name
0,BBCArticles
1,AllTheNews21
2,BBCSportsArticles


## How Many NULL Section for 2018-2020?

In [6]:
q = """
SELECT COUNT(*)
FROM public."AllTheNews21"
WHERE year BETWEEN 2018 AND 2020
AND section IS NULL;
"""
pd.read_sql(q, con=engine)

Unnamed: 0,count
0,424950


### If we drop the NULL Sections, how many per publication remains?

In [7]:
q = """
SELECT 
    DISTINCT publication,
    COUNT(*) AS articles_count
FROM public."AllTheNews21"
WHERE year BETWEEN 2018 AND 2020
AND section IS NOT NULL
GROUP BY publication
ORDER BY articles_count DESC
"""
articles_per_publication = pd.read_sql(q, con=engine)
articles_per_publication

Unnamed: 0,publication,articles_count
0,Reuters,423460
1,CNBC,132316
2,The New York Times,123253
3,CNN,76581
4,People,68602
5,Vice,29551
6,The Verge,24198
7,Fox News,17624
8,Economist,11268
9,Gizmodo,9700


## Generalizing the Categories For Non-Null Sections

Terms to remove: 

- CORRECTED
- UPDATE
- COLUMN
- BRIEF

### Mapping Categories for Reuters

Only for `sections` with `article_counts >= 50`. `sections` with lesser `article_counts` will eventually be dropped

In [8]:
# Tuple: (old: Section, new: Category)
reuters_keep = [
    ("World News", "world"),
    ("Business News", "business"),
    ("Market News", "markets and investments"),
    ("Healthcare", "global healthcare"),
    ("Financials", "finance and economics"),
    ("Sports News", "sports"),
    ("Bonds News","finance and economics"),
    ("Company News", "business"),
    ("Commodities", "markets and investments"),
    ("Politics", "politics"),
    ("Deals", "business"),
    ("Consumer Goods and Retail", "business"),
    ("Technology News", "science and technology"),
    ("Environment", "climate and environment"),
    ("Health News", "health and wellness"),
    ("Brexit", "politics"),
    ("Asia", "world"),
    ("Energy", "energy"),
    ("U.S.", "us"),
    ("Entertainment News", "entertainment"),
    ("Switzerland Market Report", "markets and investments"),
    ("Westlaw News", "legal and crimes"),
    ("Industrials", "business"),
    ("Basic Materials", "finance and economics"),
    ("Funds News", "finance and economics"),
    ("Davos", "finance and economics"),
    ("US MLB", "sports"),
    ("Hot Stocks", "markets and investments"),
    ("Japan", "world"),
    ("US College Basketball", "sports"),
    ("Sustainable Business", "business"),
    ("US NHL", "sports"),
    ("US NBA", "sports"),
    ("Wealth", "wealth"),
    ("Foreign Exchange Analysis", "finance and economics"),
    ("Technology, Media and Telecommunications", "science and technology"),
    ("Mergers & Acquisitions - Americas", "business"),
    ("U.S. Legal News", "legal and crimes"),
    ("IT Services & Consulting", "science and technology"),
    ("Regulatory News - Americas", "business"),
    ("Auto & Truck Manufacturers", "automobiles"),
    ("Chinese Labor Unrest", "world"),
    ("Lifestyle", "life-style"),
    ("Communications Equipment", "science and technology"),
    ("Earnings Season", "business"),
    ("US College Football", "sports"),
    ("Apparel & Accessories", "business"),
    ("Fintech", "science and technology"),
    ("Software", "science and technology"),
    ("Integrated Telecommunications Services", "science and technology"),
    ("Science News", "science and technology"),
    ("Olympics News", "sports"),
    ("Semiconductors", "science and technology"),
    ("Supreme Court", "legal and crimes"),
    ("Cyber Risk", "science and technology"),
    ("Bankruptcy News", "business"),
    ("Esports", "sports"),
    ("US NFL", "sports"),
    ("Gold Market Report", "markets and investments"),
    ("Target LGBT", "life-style"),
    ("Industry, Materials and Utilities", "world"),
    ("Financial Services and Real Estate", "markets and investments"),
    ("Biotechnology", "science and technology"),
    ("Computer Hardware", "science and technology"),
    ("Money Markets RSS", "markets and investments"),
    ("Arts", "arts"),
    ("India", "world"),
    ("Publishing", "business"),
    ("Internet of Things", "science and technology"),
    ("Entertainment Production", "entertainment"),
    ("Consumer Electronics", "business"),
    ("Private Equity", "markets and investments"),
    ("Beverages - Distillers & Wineries", "food"),
    ("Agriculture", "business"),
    ("Full coverage of the Winter Olympics.", "sports"),
    ("London Market Report", "markets and investments"),
    ("Beverages - Brewers", "food"),
    ("Hedge Funds - Americas", "markets and investments"),
    ("G20", "politics"),
    ("Advanced Medical Equipment", "science and technology"),
    ("India Election", "politics"),
    ("Office Equipment", "business"),
    ("News from the 2019 World Economic Forum", "finance and economics"),
    ("Mergers & Acquisitions - Asias", "business"),
    ("Myanmar", "world"),
    ("Regulatory News - Asias", "legal and crimes"),
    ("Healthcare Facilities", "business"),
    ("Olympics Rio", "sports")
]

# Just ignore these and the columns for new category will be Null
reuters_exclude = [
    "Big Story 10",
    "Cyclical Consumer Goods",
    "Non-Cyclical Consumer Goods",
    "Credit RSS",
    "Corrections News",
    "Breakingviews",
    "Broadcasting",
    "Big Story",
    "On The Case",
    "Internet News",
    "Special Reports",
    "RPB 164",
    "Oddly Enough",
    "Commentary",
    "News Now",
    "Big Story 12",
    "Reuters Fact Check",
    "Media News",
    "Factbox",
    "Rates RSS"
]

### Mapping Categories for CNBC

Only for `sections` with `article_counts >= 50`. `sections` with lesser `article_counts` will eventually be dropped

In [9]:
# Tuple: (old: Section, new: Category)
cnbc_keep = [
    ("Politics", "politics"),
    ("Tech", "science and technology"),
    ("Markets", "markets and investments"),
    ("Investing", "markets and investments"),
    ("Personal Finance", "personal finance"),
    ("Market Insider", "markets and investments"),
    ("Health and Science", "health and wellness"),
    ("Retail", "business"),
    ("Autos", "automobiles"),
    ("Economy", "finance and economics"),
    ("Energy", "energy"),
    ("Europe News", "world"),
    ("Airlines", "travel and transportation"),
    ("World Economy", "finance and economics"),
    ("Business News", "business"),
    ("Currencies", "finance and economics"),
    ("World Politics", "politics"),
    ("Restaurants", "food"),
    ("Federal Reserve", "finance and economics"),
    ("US Markets", "markets and investments"),
    ("Asia Markets", "markets and investments"),
    ("Entertainment", "entertainment"),
    ("Finance", "finance and economics"),
    ("Bonds", "finance and economics"),
    ("Real Estate", "real estate"),
    ("China Economy", "finance and economics"),
    ("Food & Beverage", "food"),
    ("World News", "world"),
    ("Banks", "finance and economics"),
    ("Sports", "sports"),
    ("Europe Politics", "politics"),
    ("Futures & Commodities", "markets and investments"),
    ("Earnings", "markets and investments"),
    ("U.S. News", "us"),
    ("Asia Politics", "politics"),
    ("Bitcoin", "markets and investments"),
    ("Asia Economy", "finance and economics"),
    ("Weather & Natural Disasters", "climate and environment"),
    ("Trade", "international relations"),
    ("Biotech and Pharma", "science and technology"),
    ("Defense", "international relations"),
    ("Media", "entertainment"),
    ("Asia-Pacific News", "world"),
    ("Enterprise", "business"),
    ("Transportation","travel and transportation"),
    ("Oil and Gas", "energy"),
    ("US Economy", "finance and economics"),
    ("Central Banks", "finance and economics"),
    ("China Politics", "politics"),
    ("Tech Guide", "science and technology"),
    ("Cybersecurity", "science and technology"),
    ("Oil", "energy"),
    ("Investing in Space", "business"),
    ("Europe Markets", "markets and investments"),
    ("Metals", "markets and investments"),
    ("Europe Economy", "finance and economics"),
    ("Aerospace & Defense", "business"),
    ("Wall Street", "markets and investments"),
    ("White House", "politics"),
    ("Hedge Funds", "markets and investments"),
    ("Cryptocurrency", "finance and economics"),
    ("Wealth", "wealth"),
    ("Travel", "travel and transportation"),
    ("Invest in You: Ready. Set. Grow.", "personal finance"),
    ("Opinion - Politics", "politics"), 
    ("Stocks", "markets and investments"),
    ("Industrials", "business"),
    ("Brexit", "politics"),
    ("Elections", "politics"),
    ("World Markets", "markets and investments"),
    ("Crime", "legal and crimes"),
    ("China Markets", "markets and investments"),
    ("Money", "wealth"),
    ("Department Stores", "business"),
    ("Environment", "climate and environment"),
    ("Life", "life-style"),
    ("Apparel", "business"),
    ("Social Media", "business"),
    ("Law", "legal and crimes"),
    ("Shark Tank", "markets and investments")
]

# Just ignore these and the columns for new category will be Null
cnbc_exclude = [
    "Wires",
    "CNBC News Releases",
    "Morning Brief",
    "Trading Nation",
    "CCTV Transcripts",
    "Mad Money",
    "Deals and IPOs",
    "Jobs",
    "Commentary",
    "At Work",
    "CNBC Disruptor"
]

### Mapping Categories for The New York Times

Only for `sections` with `article_counts >= 50`. `sections` with lesser `article_counts` will eventually be dropped

In [10]:
# Tuple: (old: Section, new: Category)
nytimes_keep = [
    ("us", "us"),
    ("world", "world"),
    ("arts", "arts"),
    ("sports", "sports"),
    ("business", "business"),
    ("books", "entertainment"),
    ("fashion", "life-style"),
    ("movies", "entertainment"),
    ("style", "life-style"),
    ("dining", "food"),
    ("learning", "life-style"),
    ("technology", "science and technology"),
    ("theater", "entertainment"),
    ("realestate", "real estate"),
    ("travel", "travel and transportation"),
    ("well", "health and wellness"),
    ("health", "health and wellness"),
    ("science", "science and technology"),
    ("climate", "climate and environment"),
    ("parenting", "life-style"),
    ("education", "education"),
    ("your-money", "personal finance")
]

# Just ignore these and the columns for new category will be Null
nytimes_exclude = [
    "opinion",
    "nyregion",
    "briefing",
    "obituaries",
    "magazine",
    "t-magazine",
    "crosswords",
    "espanol",
    "pageoneplus",
    "todayspaper",
    "reader-center",
    "upshot",
    "smarter-living",
    "universal",
    "insider",
    "lens",
    "neediest-cases",
    "admin",
    "sunday-review",
    "the-weekly",
    "jobs",
    "video",
    "homepage",
    "todaysinyt",
    "international-home",
    "multimedia",
    "podcasts",
    "automobiles"
]

### Mapping Categories for CNN

Since CNN has lesser categories, we will consider all the articles sections

In [11]:
# Tuple: (old: Section, new: Category)

cnn_keep = [
    ("us", "us"),
    ("asia", "world"),
    ("world", "world"),
    ("china", "world"),
    ("africa", "world"),
    ("australia", "world"),
    ("middleeast", "world"),
    ("americas", "world"),
    ("india", "world"),
    ("europe", "world"),
    ("uk", "world"),
    ("app-international-edition", "world"),
    ("business", "business"),
    ("intl_business", "business"),
    ("entertainment", "entertainment"),
    ("celebrities", "entertainment"),
    ("tv-shows", "entertainment"),
    ("movies", "entertainment"),
    ("tech", "science and technology"),
    ("health", "health and wellness"),
    ("weather", "climate and environment"),
    ("cars", "automobiles"),
    ("economy", "finance and economics"),
    ("intl_business", "finance and economics"),
    ("investing", "markets and investments"),
    ("politics", "politics"),
    ("_politics-zone-injection", "politics"),
    ("app-politics-section", "politics"),
    ("energy", "energy"),
    ("culture", "life-style"),
    ("living", "life-style"),
    ("sport", "sport"),
    ("app-sports-section", "sport"),
    ("homes", "real-estate")
]

# Just ignore these and the columns for new category will be Null
cnn_exclude = [
    "opinions",
    "media",
    "app-news-section",
    "success",
    "cnn10",
    "perspectives",
    "vr",
    "cnn-info",
    "homepage2"
]

### Mapping Categories for People

Since CNN has lesser categories, we will consider all the articles sections

In [29]:
# Tuple: (old: Section, new: Category)
people_keep = [
    ("politics", "politics"),
    ("tech", "science and technology"),
    ("sports", "sports"),
    ("tv", "arts and entertainment"),
    ("movies", "arts and entertainment"),
    ("music", "arts and entertainment"),
    ("country", "arts and entertainment"),
    ("theater", "arts and entertainment"),
    ("books", "arts and entertainment"),
    ("awards", "arts and entertainment"),
    ("people-tv-ew", "arts and entertainment"),
    ("entertainment", "arts and entertainment"),
    ("health", "health and wellness"),
    ("bodies", "health and wellness"),
    ("parents", "life"),
    ("pets", "life"),
    ("style", "life"),
    ("human-interest", "life"),
    ("royals", "life"),
    ("celebrity", "life"),
    ("babies", "life"),
    ("lifestyle", "life"),
    ("social-media-stars", "life"),
    ("fashion", "life"),
    ("crime", "legal and crimes"),
    ("food", "food"),
    ("travel", "travel and transportation"),
    ("home", "real estate"),
    ("real-estate", "real estate")
]

# Just ignore these and the columns for new category will be Null
people_exclude = [
    "chica",
    "beauty",
    "archive",
    "breaking-news",
    "premium",
    "uncategorized"
]

### Mapping Categories for Vice

Only for `sections` with `article_counts >= 30`. `sections` with lesser `article_counts` will eventually be dropped

In [48]:
# Tuple: (old: Section, new: Category)
vice_keep = [
    ("Entertainment", "arts and entertainment"),
    ("Music by VICE", "arts and entertainment"),
    ("Games", "arts and entertainment"),
    ("Environment", "climate and environment"),
    ("Food by VICE", "food"),
    ("Health", "health and wellness"),
    ("Health", "health and wellness"),
    ("Drugs", "legal and crimes"),
    ("crime", "legal and crimes"),
    ("true crime", "legal and crimes"),
    ("Identity", "life"),
    ("Sex", "life"),
    ("Life", "life"),
    ("Money", "personal finance"),
    ("Views My Own", "politics"),
    ("politics", "politics"),
    ("Rise Up", "politics"),
    ("immigration", "politics"),
    ("Tech by VICE", "science and technology"),
    ("Sports", "sports"),
    ("Travel", "travel and transportation")
]

# Just ignore these and the columns for new category will be Null
vice_exclude = [
    "News by VICE",
    "Noisey",
    "Astro Guide",
    "The VICE Guide to Right Now",
    "VICE News",
    "Comics!",
    "The VICE Guide to Right Now Podcast",
    "Motherboard",
    "News",
    "Desus & Mero",
    "Voices",
    "The VICE Morning Bulletin"
]

### Mapping Categories for The Verge

Only for `sections` with `article_counts >= 30`. `sections` with lesser `article_counts` will eventually be dropped

In [79]:
# Tuple: (old: Section, new: Category)
the_verge_keep = [
    ("Gaming", "arts and entertainment"),
    ("Movie Review", "arts and entertainment"),
    ("Book Review", "arts and entertainment"),
    ("Art Club", "arts and entertainment"),
    ("New Adventures", "arts and entertainment"),
    ("Entertainment", "arts and entertainment"),
    ("YouTube", "arts and entertainment"),
    ("Apps", "business"),
    ("Web", "business"),    
    ("Business", "business"),    
    ("Design", "business"),
    ("Policy", "legal and crimes"),
    ("Good Deals", "life"),
    ("Culture", "life"),
    ("Tech", "science and technology"),
    ("Google", "science and technology"),
    ("Apple", "science and technology"),
    ("Science", "science and technology"),
    ("Microsoft", "science and technology"),
    ("Mobile", "science and technology"),
    ("Smart Home", "science and technology"),
    ("Reviews", "science and technology"),
    ("Photography", "science and technology"),
    ("Hands-on", "science and technology"),
    ("Circuit Breaker", "science and technology"),
    ("Transportation", "travel and transportation"),
    ("US & World", "world")
]

# Just ignore these and the columns for new category will be Null
the_verge_exclude = [
    "TL;DR",
    "Podcasts",
    "Report",
    "How-to",
    "Watch This",
    "StoryStream",
    "Featured Videos",
    "Interview",
    "Features",
    "Editorial"
]

### Mapping Categories for Fox News

In [78]:
# # For testing purposes
q = """
SELECT *
FROM public."AllTheNews21"
WHERE year BETWEEN 2018 AND 2020
AND section = 'Art Club'
AND publication = 'The Verge'
LIMIT 50
"""
for x in pd.read_sql(q, con=engine)["title"]:
    print(x)

These sleek animations highlight the relationship between designers and clients
Beeple on creating a zombie Mark Zuckerberg and flesh-eating Baby Yoda to examine the times
How Nickelodeon’s SpongeBob meme toys were birthed by the internet
Inside the Oh Sees’ year-long effort to put their albums on 8-track box sets
The artist putting tiny Kirby, waffles, and SpongeBob on your mechanical keyboard
Julien Rivoire’s 3D concepts imagine modern-day versions of retro tech
Tom Hegen’s aerial photography captures the human impact on natural landscapes
How Team Rolfes uses motion capture suits to create wild interactive experiences
How Blake Kathryn pulls futuristic 3D dreamscapes from her subconscious
George Mager’s bouncy, multilingual world uses script as design
Making weird go viral with Hi Stranger creator Kirsten Lepore
Photographer Richard Parry shows the innovation of consumer tech from the inside out 
How Ommy Akhe makes her ultra-cool Instagram AR filters
Community of Microbes: augmente

In [50]:
q = """
SELECT 
    DISTINCT section,
    COUNT(*) AS articles_count
FROM public."AllTheNews21"
WHERE year BETWEEN 2018 AND 2020
AND section IS NOT NULL
AND publication = 'The Verge'
GROUP BY section
ORDER BY articles_count DESC
"""
articles_per_section = pd.read_sql(q, con=engine)
print(len(articles_per_section))
articles_per_section

101


Unnamed: 0,section,articles_count
0,Tech,3284
1,Google,2314
2,Gaming,2303
3,Apple,2020
4,Entertainment,1952
5,Policy,1650
6,Science,1424
7,Microsoft,1095
8,Mobile,981
9,Apps,962


## Adding New `Category` Column to AllTheNews DB

First, create a new `category` column on `AllTheNews21`. 

**Only run this once in pgAdmin**

```sql
-- Create a new "category" column
ALTER TABLE public."AllTheNews21"
ADD COLUMN category TEXT;
```

Next, run the update script. (**Only run once, unless rebuilding the DB**)

### Update Script for Reuters

In [14]:
# # Populate the "category" column for Reuters
# for (old_section, new_category) in reuters_keep:
    
#     q = f"""
#     UPDATE public."AllTheNews21"
#     SET category = '{new_category}'
#     WHERE publication = 'Reuters'
#     AND section = '{old_section}'
#     """
    
#     engine.execute(q)

### Update Script for CNBC

In [15]:
# # Populate the "category" column for CNBC
# for (old_section, new_category) in cnbc_keep:
    
#     q = f"""
#     UPDATE public."AllTheNews21"
#     SET category = '{new_category}'
#     WHERE publication = 'CNBC'
#     AND section = '{old_section}'
#     """
    
#     engine.execute(q)

### Update Script for The New York Times

In [16]:
# # Populate the "category" column for The New York Times
# for (old_section, new_category) in nytimes_keep:
    
#     q = f"""
#     UPDATE public."AllTheNews21"
#     SET category = '{new_category}'
#     WHERE publication = 'The New York Times'
#     AND section = '{old_section}'
#     """
    
#     engine.execute(q)

### Update Script for CNN

In [17]:
# # Populate the "category" column for CNN
# for (old_section, new_category) in cnn_keep:
    
#     q = f"""
#     UPDATE public."AllTheNews21"
#     SET category = '{new_category}'
#     WHERE publication = 'CNN'
#     AND section = '{old_section}'
#     """
    
#     engine.execute(q)

### Updated Script for People

In [30]:
# # Populate the "category" column for People
# for (old_section, new_category) in people_keep:
    
#     q = f"""
#     UPDATE public."AllTheNews21"
#     SET category = '{new_category}'
#     WHERE publication = 'People'
#     AND section = '{old_section}'
#     """
    
#     engine.execute(q)

### Updated Script for Vice

In [49]:
# # Populate the "category" column for Vice
# for (old_section, new_category) in vice_keep:
    
#     q = f"""
#     UPDATE public."AllTheNews21"
#     SET category = '{new_category}'
#     WHERE publication = 'Vice'
#     AND section = '{old_section}'
#     """
    
#     engine.execute(q)

### Updated Script for The Verge

In [81]:
# # Populate the "category" column for The Verge
# for (old_section, new_category) in the_verge_keep:
    
#     q = f"""
#     UPDATE public."AllTheNews21"
#     SET category = '{new_category}'
#     WHERE publication = 'The Verge'
#     AND section = '{old_section}'
#     """
    
#     engine.execute(q)