# Read tables

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *
from reddit.reddit_streaming import *
import datetime as dt
import pprint
pp = pprint.PrettyPrinter(indent = 1)
%load_ext sparksql_magic

creds, config = read_files()
subreddit_list = config["subreddit"]
kafka_host = config["kafka_host"]
spark_host = config["spark_host"]
aws_client = creds["aws-client"]
aws_secret = creds["aws-secret"]

for subreddit in subreddit_list:
    spark = SparkSession.builder.appName("reddit_" + subreddit + "_read_data") \
                        .master("spark://{}:7077".format(spark_host)) \
                        .config("spark.scheduler.mode", "FAIR") \
                        .config("spark.scheduler.allocation.file", "file:///opt/workspace/redditStreaming/fairscheduler.xml") \
                        .config("spark.executor.memory", "1024m") \
                        .config("spark.executor.cores", "2") \
                        .config("spark.streaming.concurrentJobs", "4") \
                        .config("spark.local.dir", "/opt/workspace/tmp/driver/{}/".format(subreddit)) \
                        .config("spark.worker.dir", "/opt/workspace/tmp/executor/{}/".format(subreddit)) \
                        .config("spark.sql.debug.maxToStringFields", 1000) \
                        .config("spark.eventLog.enabled", "true") \
                        .config("spark.eventLog.dir", "file:///opt/workspace/events") \
                        .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.2.0,org.apache.hadoop:hadoop-common:3.3.1,org.apache.hadoop:hadoop-aws:3.3.1,org.apache.hadoop:hadoop-client:3.3.1,io.delta:delta-core_2.12:1.2.1") \
                        .config("spark.hadoop.fs.s3a.access.key", aws_client) \
                        .config("spark.hadoop.fs.s3a.secret.key", aws_secret) \
                        .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
                        .config('spark.hadoop.fs.s3a.aws.credentials.provider', 'org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider') \
                        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
                        .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
                        .config("spark.delta.logStore.class", "org.apache.spark.sql.delta.storage.S3SingleDriverLogStore") \
                        .enableHiveSupport() \
                        .getOrCreate()

    print("created spark successfully.")

    df = spark.read.format("delta").option("header", True).load("s3a://reddit-stevenhurwitt/" + subreddit + "_clean/")
    
    df.createOrReplaceTempView("reddit_{}".format(subreddit))
    print("created table reddit_{}.".format(subreddit))


:: loading settings :: url = jar:file:/usr/local/lib/python3.7/dist-packages/pyspark/jars/ivy-2.5.0.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
org.apache.hadoop#hadoop-common added as a dependency
org.apache.hadoop#hadoop-aws added as a dependency
org.apache.hadoop#hadoop-client added as a dependency
io.delta#delta-core_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-eaa00a86-932d-4cf4-8b10-c74c4eaa5b3d;1.0
	confs: [default]
	found org.apache.spark#spark-sql-kafka-0-10_2.12;3.2.0 in central
	found org.apache.spark#spark-token-provider-kafka-0-10_2.12;3.2.0 in central
	found org.apache.kafka#kafka-clients;2.8.0 in central
	found org.lz4#lz4-java;1.7.1 in central
	found org.xerial.snappy#snappy-java;1.1.8.4 in central
	found org.slf4j#slf4j-api;1.7.30 in central
	found org.apache.hadoop#hadoop-client-runtime;3.3.1 in central
	found org.spark-project.spark#unused;1.0.0 in central
	found org.apache.hadoop#hadoop-client-api;3.

created spark successfully.


22/06/17 18:47:50 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
                                                                                

created table reddit_technology.
created spark successfully.


                                                                                

created table reddit_ProgrammerHumor.
created spark successfully.


## technology

In [2]:
%%sparksql

select CAST(created_utc AS DATE), count(*) from reddit_technology 
group by CAST(created_utc AS DATE) 
order by CAST(created_utc AS DATE) asc

                                                                                

0,1
created_utc,count(1)
2022-05-11,3
2022-05-12,33
2022-05-13,27
2022-05-16,15
2022-06-14,44
2022-06-15,42
2022-06-16,42
2022-06-17,5


In [3]:
%%sparksql

select * from reddit_technology order by created_utc asc



only showing top 20 row(s)


                                                                                

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98
approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,top_awarded_type,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,ups,total_awards_received,thumbnail_width,author_flair_template_id,is_original_content,secure_media,is_reddit_media_domain,is_meta,category,link_flair_text,can_mod_post,score,approved_by,is_created_from_ads_ui,author_premium,thumbnail,edited,author_flair_css_class,post_hint,content_categories,is_self,subreddit_type,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,url_overridden_by_dest,view_count,archived,no_follow,is_crosspostable,pinned,over_18,media_only,link_flair_template_id,can_gild,spoiler,locked,author_flair_text,visited,removed_by,mod_note,distinguished,subreddit_id,author_is_blocked,mod_reason_by,num_reports,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video
,technology,,t2_8pgkdmnc,False,,0,False,Tech is hitting the brakes on hiring even as other industries keep adding jobs,r/technology,False,6,general,0,93,,True,t3_unm15z,False,dark,1.0,,3,0,140,,False,,False,False,,Business,False,3,,False,False,https://a.thumbs.redditmedia.com/0YC2GoTdt-YfMeBaTlYL2FNrrNVCUUSfaPFXbgnN4Q4.jpg,False,,link,,False,public,2022-05-11 22:38:56,text,6,,,text,cnbc.com,False,,,,,https://www.cnbc.com/amp/2022/05/11/tech-layoffs-hiring-slowdown-stand-out-in-red-hot-job-market.html,,False,False,True,False,False,False,49cac61c-a816-11e9-be34-0ebbab5890a0,True,False,False,,False,,,,t5_2qh16,False,,,,,unm15z,True,,saurin212,,1,True,all_ads,False,False,,/r/technology/comments/unm15z/tech_is_hitting_the_brakes_on_hiring_even_as/,all_ads,False,https://www.cnbc.com/amp/2022/05/11/tech-layoffs-hiring-slowdown-stand-out-in-red-hot-job-market.html,11961363,2022-05-11 22:38:56,0,,False
,technology,,t2_50rzo,False,,0,False,"Appeals court lifts stay, allows TX social media law to go into effect",r/technology,False,6,general,0,70,,True,t3_unm8vy,False,dark,1.0,,3,0,140,,False,,False,False,,Politics,False,3,,False,False,https://b.thumbs.redditmedia.com/nQHl8WXhGBisDJXlmDGVXpNRldbpWMH1xldfflbxWAc.jpg,False,,link,,False,public,2022-05-11 22:49:36,text,6,,,text,protocol.com,False,,,,,https://www.protocol.com/bulletins/texas-bias-law-proceeds,,False,True,True,False,False,False,70f05306-a816-11e9-b261-0e41843d240e,True,False,False,,False,,,,t5_2qh16,False,,,,,unm8vy,True,,macnbc,,0,True,all_ads,False,False,,/r/technology/comments/unm8vy/appeals_court_lifts_stay_allows_tx_social_media/,all_ads,False,https://www.protocol.com/bulletins/texas-bias-law-proceeds,11961400,2022-05-11 22:49:36,0,,False
,technology,,t2_5w9r3,False,,0,False,Court lets Texas restrictions on social platform content moderation take effect,r/technology,False,6,general,0,73,,True,t3_unmsdt,False,dark,1.0,,2,0,140,,False,,False,False,,Social Media,False,2,,False,True,https://b.thumbs.redditmedia.com/ncGR-bgoKIUbuE-gMQY2lAODE5fyDOww-ZF78xEBk0s.jpg,False,,link,,False,public,2022-05-11 23:17:20,text,6,,,text,theverge.com,False,,,,,https://www.theverge.com/2022/5/11/23067002/texas-netchoice-paxton-hb20-social-media-law-fifth-circuit-appeals-court-grant-stay-ruling,,False,False,True,False,False,False,7d4d8376-a816-11e9-a92d-0e6b9fa95170,True,False,False,,False,,,,t5_2qh16,False,,,,,unmsdt,True,,habichuelacondulce,,0,True,all_ads,False,False,,/r/technology/comments/unmsdt/court_lets_texas_restrictions_on_social_platform/,all_ads,False,https://www.theverge.com/2022/5/11/23067002/texas-netchoice-paxton-hb20-social-media-law-fifth-circuit-appeals-court-grant-stay-ruling,11961514,2022-05-11 23:17:20,0,,False
,technology,,t2_c92d05jm,False,,0,False,"Google Revamps Search, Maps Features for Younger, TikTok-Savvy Users",r/technology,False,6,general,0,70,,True,t3_uo08gz,False,dark,1.0,,2,0,140,,False,,False,False,,Social Media,False,2,,False,True,https://b.thumbs.redditmedia.com/652EQGu3myUQwgNU6XGiIApm3ikZFsdueA7x_vcOSvc.jpg,False,,link,,False,public,2022-05-12 12:45:52,text,6,,,text,businessinsider.com,False,,,,,https://www.businessinsider.com/google-revamps-search-maps-features-younger-tik-tok-savvy-users-2022-5,,False,False,True,False,False,False,7d4d8376-a816-11e9-a92d-0e6b9fa95170,True,False,False,,False,,,,t5_2qh16,False,,,,,uo08gz,True,,HLMenckenFan,,1,True,all_ads,False,False,,/r/technology/comments/uo08gz/google_revamps_search_maps_features_for_younger/,all_ads,False,https://www.businessinsider.com/google-revamps-search-maps-features-younger-tik-tok-savvy-users-2022-5,11964987,2022-05-12 12:45:52,0,,False
,technology,,t2_4layskvl,False,,0,False,Tesla CEO Elon Musk dismisses hydrogen as tool for energy storage,r/technology,False,6,general,0,78,,True,t3_uo0fkj,False,dark,0.33000001311302185,,0,0,140,,False,,False,False,,Energy,False,0,,False,True,https://b.thumbs.redditmedia.com/rHLq9sbrbnxCEvDW5PLJMBMBrqEsOuq-TlFlSeLtMic.jpg,False,,link,,False,public,2022-05-12 12:56:32,text,6,,,text,cnbc.com,False,,,,,https://www.cnbc.com/2022/05/12/tesla-ceo-elon-musk-dismisses-hydrogen-as-tool-for-energy-storage.html#Echobox=1652353065,,False,True,True,False,False,False,4f78ed5a-a816-11e9-85c4-0e6d89cfa908,True,False,False,,False,,,,t5_2qh16,False,,,,,uo0fkj,True,,Vercitti,,0,True,all_ads,False,False,,/r/technology/comments/uo0fkj/tesla_ceo_elon_musk_dismisses_hydrogen_as_tool/,all_ads,False,https://www.cnbc.com/2022/05/12/tesla-ceo-elon-musk-dismisses-hydrogen-as-tool-for-energy-storage.html#Echobox=1652353065,11965022,2022-05-12 12:56:32,0,,False
,technology,,t2_mvs2r6w,False,,0,False,Even Microsoft thinks you should uninstall the latest Windows 11 update,r/technology,False,6,general,0,78,,True,t3_uo0kjc,False,dark,1.0,,1,0,140,,False,,False,False,,Software,False,1,,False,False,https://b.thumbs.redditmedia.com/otzy0NAOZEas7GudkDPXhONDFfyaxtniez1e7G3Vt-g.jpg,False,,link,,False,public,2022-05-12 13:02:56,text,6,,,text,techradar.com,False,,,,,https://www.techradar.com/news/even-microsoft-thinks-you-should-uninstall-the-latest-windows-11-update,,False,True,True,False,False,False,85c4d73e-a816-11e9-95d5-0ee6b96491cc,True,False,False,,False,,,,t5_2qh16,False,,,,,uo0kjc,True,,WPHero,,0,True,all_ads,False,False,,/r/technology/comments/uo0kjc/even_microsoft_thinks_you_should_uninstall_the/,all_ads,False,https://www.techradar.com/news/even-microsoft-thinks-you-should-uninstall-the-latest-windows-11-update,11965069,2022-05-12 13:02:56,0,,False
,technology,,t2_c92d05jm,False,,0,False,Running Twitter may be much harder than Elon Musk thinks,r/technology,False,6,general,0,93,,True,t3_uo0oik,False,dark,1.0,,1,0,140,,False,,False,False,,Social Media,False,1,,False,True,https://a.thumbs.redditmedia.com/_qbWttzCtSZY8ZeO9i4xlHEo__sDT8euFaoegTvONN0.jpg,False,,link,,False,public,2022-05-12 13:07:12,text,6,,,text,apnews.com,False,,,,,https://apnews.com/article/elon-musk-technology-donald-trump-cc6e43c5807fe1823701cf4b39012c3e,,False,True,True,False,False,False,7d4d8376-a816-11e9-a92d-0e6b9fa95170,True,False,False,,False,,,,t5_2qh16,False,,,,,uo0oik,True,,HLMenckenFan,,0,True,all_ads,False,False,,/r/technology/comments/uo0oik/running_twitter_may_be_much_harder_than_elon_musk/,all_ads,False,https://apnews.com/article/elon-musk-technology-donald-trump-cc6e43c5807fe1823701cf4b39012c3e,11965093,2022-05-12 13:07:12,0,,False
,technology,,t2_guf36,False,,0,False,California regulators set to vote on desalination plant,r/technology,False,6,general,0,73,,True,t3_uo0pyg,False,dark,1.0,,1,0,140,,False,,False,False,,Business,False,1,,False,True,https://b.thumbs.redditmedia.com/S7HjBssounQQwNNJPk8zgSS2UN88b9LQZT7rEb_OmzQ.jpg,False,,link,,False,public,2022-05-12 13:09:20,text,6,,,text,reuters.com,False,,,,,https://www.reuters.com/world/us/california-regulators-set-vote-desalination-plant-2022-05-12/?rpc=401&amp;,,False,True,True,False,False,False,49cac61c-a816-11e9-be34-0ebbab5890a0,True,False,False,,False,,,,t5_2qh16,False,,,,,uo0pyg,True,,Wagamaga,,0,True,all_ads,False,False,,/r/technology/comments/uo0pyg/california_regulators_set_to_vote_on_desalination/,all_ads,False,https://www.reuters.com/world/us/california-regulators-set-vote-desalination-plant-2022-05-12/?rpc=401&amp;,11965107,2022-05-12 13:09:20,0,,False
,technology,,t2_7ccf,False,,0,False,Royal Mail drone fleet takes to the air for remote Scottish island deliveries - A fleet of hi-tech drones will be deployed to the skies over the Scotland to ensure the mail gets to islanders on time.,r/technology,False,6,general,0,93,,True,t3_uo0r18,False,dark,1.0,,3,0,140,,False,,False,False,,Robotics/Automation,False,3,,False,True,https://b.thumbs.redditmedia.com/XNFuHinae5U7M9HgZgzG-t2iIgL7Y660dNB6jA7Rztc.jpg,False,,link,,False,public,2022-05-12 13:11:28,text,6,,,text,edinburghnews.scotsman.com,False,,,,,https://www.edinburghnews.scotsman.com/news/people/royal-mail-drone-fleet-takes-to-the-air-for-remote-scottish-island-deliveries-3689487,,False,False,True,False,False,False,776c82c2-a816-11e9-8179-0ec6fe24d256,True,False,False,,False,,,,t5_2qh16,False,,,,,uo0r18,True,,speckz,,0,True,all_ads,False,False,,/r/technology/comments/uo0r18/royal_mail_drone_fleet_takes_to_the_air_for/,all_ads,False,https://www.edinburghnews.scotsman.com/news/people/royal-mail-drone-fleet-takes-to-the-air-for-remote-scottish-island-deliveries-3689487,11965116,2022-05-12 13:11:28,0,,False


## news

In [4]:
%%sparksql

select CAST(created_utc AS DATE), count(*) from reddit_news
group by CAST(created_utc AS DATE) 
order by CAST(created_utc AS DATE) asc

                                                                                

0,1
created_utc,count(1)
2022-06-17,4


In [5]:
%%sparksql

select * from reddit_news order by created_utc asc

                                                                                

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98
approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,top_awarded_type,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,ups,total_awards_received,thumbnail_width,author_flair_template_id,is_original_content,secure_media,is_reddit_media_domain,is_meta,category,link_flair_text,can_mod_post,score,approved_by,is_created_from_ads_ui,author_premium,thumbnail,edited,author_flair_css_class,post_hint,content_categories,is_self,subreddit_type,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,url_overridden_by_dest,view_count,archived,no_follow,is_crosspostable,pinned,over_18,media_only,link_flair_template_id,can_gild,spoiler,locked,author_flair_text,visited,removed_by,mod_note,distinguished,subreddit_id,author_is_blocked,mod_reason_by,num_reports,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video
,news,,t2_bs1la4xg,False,,0,False,"Mortgage rates hit 5.78%, the biggest weekly jump since 1987",r/news,False,6,,0,,,True,t3_ved5yu,False,dark,0.9599999785423279,,54,0,,,False,,False,False,,,False,54,,False,False,,False,,,,False,public,2022-06-17 13:26:24,text,6,,,text,cnn.com,False,,,,,https://www.cnn.com/2022/06/16/homes/mortgage-rates-june-16/index.html,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh3l,False,,,,,ved5yu,True,,Dizzy-Noise-583,,16,True,all_ads,False,False,,/r/news/comments/ved5yu/mortgage_rates_hit_578_the_biggest_weekly_jump/,all_ads,False,https://www.cnn.com/2022/06/16/homes/mortgage-rates-june-16/index.html,24820150,2022-06-17 13:26:24,0,,False
,news,,t2_tlqk8,False,,0,False,"West Memphis police investigating Friday shooting incident, 2-year-old dies from injuries",r/news,False,6,,0,,,True,t3_vee3iw,False,dark,1.0,,4,0,,,False,,False,False,,,False,4,,False,False,,False,,,,False,public,2022-06-17 14:11:12,text,6,,,text,katv.com,False,,,,,https://katv.com/news/local/west-memphis-police-investigating-friday-shooting-incident-2-year-old-dies-from-injury-baptist-memorial-hospital-crittenden-crittenden-county-crime-stoppers-crime-scene-interview-witnesses-arkansas-news,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh3l,False,,,,,vee3iw,True,,Dillatrack,,0,True,all_ads,False,False,,/r/news/comments/vee3iw/west_memphis_police_investigating_friday_shooting/,all_ads,False,https://katv.com/news/local/west-memphis-police-investigating-friday-shooting-incident-2-year-old-dies-from-injury-baptist-memorial-hospital-crittenden-crittenden-county-crime-stoppers-crime-scene-interview-witnesses-arkansas-news,24820176,2022-06-17 14:11:12,0,,False
,news,,t2_3qwsyobo,False,,0,False,‘Unacceptable’: Russian warship accused of violating Danish waters,r/news,False,6,,0,,,True,t3_veebzd,False,dark,1.0,,3,0,,,False,,False,False,,,False,3,,False,False,,False,,,,False,public,2022-06-17 14:21:52,text,6,,,text,theguardian.com,False,,,,,https://www.theguardian.com/world/2022/jun/17/unacceptable-russian-warship-accused-of-violating-danish-waters,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh3l,False,,,,,veebzd,True,,Bangemkikkoks,,0,True,all_ads,False,False,,/r/news/comments/veebzd/unacceptable_russian_warship_accused_of_violating/,all_ads,False,https://www.theguardian.com/world/2022/jun/17/unacceptable-russian-warship-accused-of-violating-danish-waters,24820201,2022-06-17 14:21:52,0,,False
,news,,t2_3qwsyobo,False,,0,False,‘Unacceptable’: Russian warship accused of violating Danish waters,r/news,False,6,,0,,,True,t3_veebzd,False,dark,0.8600000143051147,,10,0,,,False,,False,False,,,False,10,,False,False,,False,,,,False,public,2022-06-17 14:21:52,text,6,,,text,theguardian.com,False,,,,,https://www.theguardian.com/world/2022/jun/17/unacceptable-russian-warship-accused-of-violating-danish-waters,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh3l,False,,,,,veebzd,True,,Bangemkikkoks,,2,True,all_ads,False,False,,/r/news/comments/veebzd/unacceptable_russian_warship_accused_of_violating/,all_ads,False,https://www.theguardian.com/world/2022/jun/17/unacceptable-russian-warship-accused-of-violating-danish-waters,24820232,2022-06-17 14:21:52,0,,False


## worldnews

In [6]:
%%sparksql

select CAST(created_utc AS DATE), count(*) from reddit_worldnews
group by CAST(created_utc AS DATE) 
order by CAST(created_utc AS DATE) asc

                                                                                

0,1
created_utc,count(1)
2022-06-17,8


In [7]:
%%sparksql

select * from reddit_worldnews order by created_utc asc

                                                                                

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98
approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,top_awarded_type,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,ups,total_awards_received,thumbnail_width,author_flair_template_id,is_original_content,secure_media,is_reddit_media_domain,is_meta,category,link_flair_text,can_mod_post,score,approved_by,is_created_from_ads_ui,author_premium,thumbnail,edited,author_flair_css_class,post_hint,content_categories,is_self,subreddit_type,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,url_overridden_by_dest,view_count,archived,no_follow,is_crosspostable,pinned,over_18,media_only,link_flair_template_id,can_gild,spoiler,locked,author_flair_text,visited,removed_by,mod_note,distinguished,subreddit_id,author_is_blocked,mod_reason_by,num_reports,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video
,worldnews,,t2_yl06t,False,,0,False,European Commission recommends granting Ukraine EU candidate status,r/worldnews,False,6,russia,0,,,True,t3_vednyc,False,,1.0,,9,0,,,False,,False,False,,Russia/Ukraine,False,9,,False,True,,False,,,,False,public,2022-06-17 13:52:00,richtext,6,,,text,ukrinform.net,False,,,,,https://www.ukrinform.net/rubric-polytics/3508964-european-commission-recommends-granting-ukraine-eu-candidate-status.html,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh13,False,,,,,vednyc,True,,Ekaton,,2,True,all_ads,False,False,,/r/worldnews/comments/vednyc/european_commission_recommends_granting_ukraine/,all_ads,False,https://www.ukrinform.net/rubric-polytics/3508964-european-commission-recommends-granting-ukraine-eu-candidate-status.html,29034211,2022-06-17 13:52:00,0,,False
,worldnews,,t2_bmncpj69,False,,0,False,China blocks joint India-US proposal to blacklist Pakistani terrorist Abdul Rehman Makki under UNSC sanctions,r/worldnews,False,6,,0,,,True,t3_vedzuc,False,dark,1.0,,4,0,,,False,,False,False,,,False,4,,False,False,,False,,,,False,public,2022-06-17 14:06:56,text,6,,,text,tribuneindia.com,False,,,,,https://www.tribuneindia.com/news/nation/china-blocks-joint-india-us-proposal-to-blacklist-pakistani-terrorist-abdul-rehman-makki-under-unsc-sanctions-404681,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh13,False,,,,,vedzuc,True,,Winterisbucky,,0,True,all_ads,False,False,,/r/worldnews/comments/vedzuc/china_blocks_joint_indiaus_proposal_to_blacklist/,all_ads,False,https://www.tribuneindia.com/news/nation/china-blocks-joint-india-us-proposal-to-blacklist-pakistani-terrorist-abdul-rehman-makki-under-unsc-sanctions-404681,29034225,2022-06-17 14:06:56,0,,False
,worldnews,,t2_3z7n2wc1,False,,0,False,UK: 50 new monkeypox cases in biggest epidemic beyond Africa,r/worldnews,False,6,,0,,,True,t3_vee2pe,False,dark,1.0,,3,0,,,False,,False,False,,,False,3,,False,True,,False,,,,False,public,2022-06-17 14:11:12,text,6,,,text,thehill.com,False,,,,,https://thehill.com/homenews/ap/ap-health/uk-50-new-monkeypox-cases-in-biggest-epidemic-beyond-africa/,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh13,False,,,,,vee2pe,True,,Arpith2019,,0,True,all_ads,False,False,,/r/worldnews/comments/vee2pe/uk_50_new_monkeypox_cases_in_biggest_epidemic/,all_ads,False,https://thehill.com/homenews/ap/ap-health/uk-50-new-monkeypox-cases-in-biggest-epidemic-beyond-africa/,29034236,2022-06-17 14:11:12,0,,False
,worldnews,,t2_4layskvl,False,,0,False,Agnipath: One dead in violent India protests over military hiring,r/worldnews,False,6,,0,,,True,t3_vee6r3,False,dark,1.0,,1,0,,,False,,False,False,,,False,1,,False,True,,False,,,,False,public,2022-06-17 14:15:28,text,6,,,text,bbc.co.uk,False,,,,,https://www.bbc.co.uk/news/world-asia-india-61841268,,False,True,True,False,False,False,,True,False,False,,False,,,,t5_2qh13,False,,,,,vee6r3,True,,Vercitti,,0,True,all_ads,False,False,,/r/worldnews/comments/vee6r3/agnipath_one_dead_in_violent_india_protests_over/,all_ads,False,https://www.bbc.co.uk/news/world-asia-india-61841268,29034252,2022-06-17 14:15:28,0,,False
,worldnews,,t2_xs9chqv,False,,0,False,Netherlands now voicing support for Ukraine's EU candidacy,r/worldnews,False,6,russia,0,,,True,t3_veebtj,False,dark,1.0,,6,0,,,False,,False,False,,Russia/Ukraine,False,6,,False,False,,False,,,,False,public,2022-06-17 14:21:52,richtext,6,,,text,nltimes.nl,False,,,,,https://nltimes.nl/2022/06/17/netherlands-now-voicing-support-ukraines-eu-candidacy,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh13,False,,,,,veebtj,True,,MC_Transparent,,0,True,all_ads,False,False,,/r/worldnews/comments/veebtj/netherlands_now_voicing_support_for_ukraines_eu/,all_ads,False,https://nltimes.nl/2022/06/17/netherlands-now-voicing-support-ukraines-eu-candidacy,29034277,2022-06-17 14:21:52,0,,False
,worldnews,,t2_33cw9m,False,,0,False,"Russia implores Ukraine to ""stop senseless resistance"" in Severodonetsk",r/worldnews,False,6,russia,0,,,True,t3_veekau,False,dark,1.0,,1,0,,,False,,False,False,,Russia/Ukraine,False,1,,False,False,,False,,,,False,public,2022-06-17 14:32:32,richtext,6,,,text,newsweek.com,False,,,,,https://www.newsweek.com/russia-implores-ukraine-stop-senseless-resistance-severodonetsk-1716332,,False,True,True,False,False,False,,True,False,False,,False,,,,t5_2qh13,False,,,,,veekau,True,,alvwg,,0,True,all_ads,False,False,,/r/worldnews/comments/veekau/russia_implores_ukraine_to_stop_senseless/,all_ads,False,https://www.newsweek.com/russia-implores-ukraine-stop-senseless-resistance-severodonetsk-1716332,29034338,2022-06-17 14:32:32,0,,False
,worldnews,,t2_geqiyunp,False,,0,False,EU Should Press Ethiopia for Tangible Rights Progress,r/worldnews,False,6,,0,,,True,t3_veew3f,False,dark,1.0,,1,0,,,False,,False,False,,,False,1,,False,False,,False,,,,False,public,2022-06-17 14:49:36,text,6,,,text,hrw.org,False,,,,,https://www.hrw.org/news/2022/06/17/eu-should-press-ethiopia-tangible-rights-progress,,False,True,True,False,False,False,,True,False,False,,False,,,,t5_2qh13,False,,,,,veew3f,True,,yodi_yodi,,0,False,all_ads,False,False,,/r/worldnews/comments/veew3f/eu_should_press_ethiopia_for_tangible_rights/,all_ads,False,https://www.hrw.org/news/2022/06/17/eu-should-press-ethiopia-tangible-rights-progress,29034411,2022-06-17 14:49:36,0,,False
,worldnews,,t2_3kyuz,False,,0,False,Nepal prepares to move Everest base camp further down the mountain after part of the glacier where encampment is based starts to MELT,r/worldnews,False,6,,0,,,True,t3_vef09q,False,dark,1.0,,2,0,,,False,,False,False,,,False,2,,False,False,,False,,,,False,public,2022-06-17 14:53:52,text,6,,,text,dailymail.co.uk,False,,,,,https://www.dailymail.co.uk/sciencetech/article-10926713/Nepal-plans-Everest-base-camp-global-warming.html,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2qh13,False,,,,,vef09q,True,,Smilefriend,,1,False,all_ads,False,False,,/r/worldnews/comments/vef09q/nepal_prepares_to_move_everest_base_camp_further/,all_ads,False,https://www.dailymail.co.uk/sciencetech/article-10926713/Nepal-plans-Everest-base-camp-global-warming.html,29034435,2022-06-17 14:53:52,0,,False


## ProgrammerHumor

In [8]:
%%sparksql

select CAST(created_utc AS DATE), count(*) from reddit_ProgrammerHumor
group by CAST(created_utc AS DATE) 
order by CAST(created_utc AS DATE) asc

                                                                                

0,1
created_utc,count(1)
2022-06-17,5


In [9]:
%%sparksql

select * from reddit_ProgrammerHumor order by created_utc asc

                                                                                

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98
approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,top_awarded_type,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,ups,total_awards_received,thumbnail_width,author_flair_template_id,is_original_content,secure_media,is_reddit_media_domain,is_meta,category,link_flair_text,can_mod_post,score,approved_by,is_created_from_ads_ui,author_premium,thumbnail,edited,author_flair_css_class,post_hint,content_categories,is_self,subreddit_type,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,url_overridden_by_dest,view_count,archived,no_follow,is_crosspostable,pinned,over_18,media_only,link_flair_template_id,can_gild,spoiler,locked,author_flair_text,visited,removed_by,mod_note,distinguished,subreddit_id,author_is_blocked,mod_reason_by,num_reports,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video
,ProgrammerHumor,,t2_54dvd6d4,False,,0,False,"Never used python, ask me a question and I’ll answer like I did.",r/ProgrammerHumor,False,6,,0,140,,True,t3_ved7pz,False,dark,0.800000011920929,,3,0,140,,False,,True,False,,,False,3,,False,False,https://a.thumbs.redditmedia.com/J5kXwF3yIEX2tqDTiAZTmQr38_lKlI8I85PPooTfu20.jpg,False,,image,,False,public,2022-06-17 13:30:40,text,6,,,text,i.redd.it,False,,,,,https://i.redd.it/i39ca2ufo6691.jpg,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2tex6,False,,,,,ved7pz,True,,TheBluePhenoix,,5,False,all_ads,False,False,,/r/ProgrammerHumor/comments/ved7pz/never_used_python_ask_me_a_question_and_ill/,all_ads,False,https://i.redd.it/i39ca2ufo6691.jpg,2056247,2022-06-17 13:30:40,0,,False
,ProgrammerHumor,,t2_75mqu1d5,False,,0,False,The pain,r/ProgrammerHumor,False,6,meme,0,140,,True,t3_vee1sy,False,dark,1.0,,1,0,140,,False,,True,False,,Meme,False,1,,False,False,https://b.thumbs.redditmedia.com/vDJEMxOnIkDY7iJdkJ4v8rxt17S-3NGxeN4qO02kClw.jpg,False,,image,,False,public,2022-06-17 14:09:04,richtext,6,,,text,i.redd.it,False,,,,,https://i.redd.it/qrbzub8fv6691.jpg,,False,True,True,False,False,False,fe277d42-440f-11e8-8ec3-0eac434afc94,True,False,False,,False,,,,t5_2tex6,False,,,,,vee1sy,True,,mcalby,,0,True,all_ads,False,False,,/r/ProgrammerHumor/comments/vee1sy/the_pain/,all_ads,False,https://i.redd.it/qrbzub8fv6691.jpg,2056264,2022-06-17 14:09:04,0,,False
,ProgrammerHumor,,t2_5wtmwnwk,False,,0,False,Running as an admin can make it run faster...,r/ProgrammerHumor,False,6,meme,0,119,,True,t3_veeho2,False,dark,1.0,,5,0,140,,False,,True,False,,Meme,False,5,,False,False,https://b.thumbs.redditmedia.com/LKEWm8rZ0uyom4SOTWKWSELv9RyqYqqvK7hwp7bBMSo.jpg,False,,image,,False,public,2022-06-17 14:30:24,richtext,6,,,text,i.redd.it,False,,,,,https://i.redd.it/2bkimh0ow6691.png,,False,False,True,False,False,False,fe277d42-440f-11e8-8ec3-0eac434afc94,True,False,False,,False,,,,t5_2tex6,False,,,,,veeho2,True,,vanessabaxton,,1,True,all_ads,False,False,,/r/ProgrammerHumor/comments/veeho2/running_as_an_admin_can_make_it_run_faster/,all_ads,False,https://i.redd.it/2bkimh0ow6691.png,2056353,2022-06-17 14:30:24,0,,False
,ProgrammerHumor,,t2_me0endh,False,,0,False,This one feels so accurate than other pictures,r/ProgrammerHumor,False,6,,0,140,,True,t3_veeshq,False,dark,1.0,,4,0,140,,False,,True,False,,,False,4,,False,False,https://a.thumbs.redditmedia.com/p1tpbwEsYSvo_BJgKqu1fcvDCYAyJgSrp-G283F8Lz8.jpg,False,,image,,False,public,2022-06-17 14:45:20,text,6,,,text,i.redd.it,False,,,,,https://i.redd.it/pfkmn1aq17691.jpg,,False,False,True,False,False,False,,True,False,False,,False,,,,t5_2tex6,False,,,,,veeshq,True,,SubhumanOxford,,0,True,all_ads,False,False,,/r/ProgrammerHumor/comments/veeshq/this_one_feels_so_accurate_than_other_pictures/,all_ads,False,https://i.redd.it/pfkmn1aq17691.jpg,2056386,2022-06-17 14:45:20,0,,False
,ProgrammerHumor,,t2_72z1d6gn,False,,0,False,We can't take it easy! ( Watch full gif),r/ProgrammerHumor,False,6,meme,0,140,,True,t3_vef1l2,False,dark,1.0,transparent,2,0,140,0d974c6c-008a-11e9-984a-0ebd37c29be8,False,,True,False,,Meme,False,2,,False,False,https://a.thumbs.redditmedia.com/NWo-XPTWZrX_ggKIKdNKDL37L8KUK4GSbnKB-UQVDU4.jpg,False,,image,,False,public,2022-06-17 14:56:00,richtext,6,,,richtext,i.redd.it,False,,,,,https://i.redd.it/ilk0ie5v37691.gif,,False,False,True,False,False,False,fe277d42-440f-11e8-8ec3-0eac434afc94,True,False,False,:c:,False,,,,t5_2tex6,False,,,,,vef1l2,True,,yuva-krishna-memes,,0,False,all_ads,False,False,dark,/r/ProgrammerHumor/comments/vef1l2/we_cant_take_it_easy_watch_full_gif/,all_ads,False,https://i.redd.it/ilk0ie5v37691.gif,2056427,2022-06-17 14:56:00,0,,False


22/06/17 18:43:41 ERROR TaskSchedulerImpl: Lost executor 2 on 172.24.0.7: worker lost
22/06/17 18:43:41 ERROR TaskSchedulerImpl: Lost executor 1 on 172.24.0.7: worker lost
22/06/17 18:43:41 ERROR TaskSchedulerImpl: Lost executor 3 on 172.24.0.7: worker lost
22/06/17 18:43:41 ERROR TaskSchedulerImpl: Lost executor 0 on 172.24.0.7: worker lost
22/06/17 18:43:42 ERROR TaskSchedulerImpl: Lost executor 5 on 172.24.0.6: worker lost
22/06/17 18:43:42 ERROR TaskSchedulerImpl: Lost executor 7 on 172.24.0.6: worker lost
22/06/17 18:43:42 ERROR TaskSchedulerImpl: Lost executor 4 on 172.24.0.6: worker lost
22/06/17 18:43:42 ERROR TaskSchedulerImpl: Lost executor 6 on 172.24.0.6: worker lost
22/06/17 18:43:43 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_49_26 !
22/06/17 18:43:43 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_66_37 !
22/06/17 18:43:43 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_15_38 !
22/06/17 18:43:43 WARN BlockMa

### stop spark

In [8]:
# spark.stop()