In [1]:
# For data handling and Plotting
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import re
import time
from datetime import datetime
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

# For reading websites
from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests

# For saving the data
from csv import writer

In [2]:
website = 'https://www.tutorialspoint.com/computer_programming_tutorials.htm'
request = requests.get(website)
soup = BeautifulSoup(request.content, 'html.parser')

container = soup.find_all('div', class_ = 'mui-col-md-12')

In [3]:
a_links = container[1].find_all('a')

In [4]:
len(a_links)

74

In [5]:
all_course_links = []
all_course_headings = []

for i in a_links:
    title = i.get('title')
    link = i.get('href')
    if 'Learn' in title:
        all_course_headings.append(title)
        all_course_links.append(i.get('href'))
    

In [6]:
all_course_links

['/unix_sockets/index.htm',
 '/apex/index.htm',
 '/arduino/index.htm',
 '/assembly_programming/index.htm',
 '/awk/index.htm',
 '/clojure/index.htm',
 '/cobol/index.htm',
 '/cprogramming/index.htm',
 '/cplusplus/index.htm',
 '/csharp/index.htm',
 '/dart_programming/index.htm',
 '/d_programming/index.htm',
 '/elixir/index.htm',
 '/elm/index.htm',
 '/erlang/index.htm',
 '/euphoria/index.htm',
 '/fortran/index.htm',
 '/fsharp/index.htm',
 '/go/index.htm',
 '/groovy/index.htm',
 '/haskell/index.htm',
 '/java/index.htm',
 '/java8/index.htm',
 '/java9/index.htm',
 '/java10/index.htm',
 '/java_mysql/index.htm',
 '/java_beanutils/index.htm',
 '/julia/index.htm',
 '/jcl/index.htm',
 '/kdbplus/index.htm',
 '/lisp/index.htm',
 '/lolcode/index.htm',
 '/logo/index.htm',
 '/lua/index.htm',
 '/matlab/index.htm',
 '/matlab_m_files/index.htm',
 '/matlab_matrix/index.htm',
 '/matlab_simulink/index.htm',
 '/nodejs/index.htm',
 '/objective_c/index.htm',
 '/oauth2.0/index.htm',
 '/pascal/index.htm',
 '/parr

In [7]:
all_course_headings

['Learn Unix Sockets',
 'Learn Apex Programming',
 'Learn Arduino Programming',
 'Learn Assembly Programming',
 'Learn Awk Programming',
 'Learn Clojure',
 'Learn COBOL',
 'Learn C Programming',
 'Learn C++',
 'Learn C#',
 'Learn Dart Programming',
 'Learn D Programming',
 'Learn Elixir',
 'Learn Elm',
 'Learn Erlang',
 'Learn Euphoria',
 'Learn Fortran',
 'Learn F#',
 'Learn Go Programing',
 'Learn Groovy Programing',
 'Learn Haskell',
 'Learn Java',
 'Learn Java-8',
 'Learn Java-9',
 'Learn Java-10',
 'Learn Java MySQL',
 'Learn Java Bean Utils',
 'Learn Julia',
 'Learn JCL',
 'Learn KDB+',
 'Learn LISP',
 'Learn LOLCODE',
 'Learn LOGO',
 'Learn Lua',
 'Learn MATLAB',
 'Learn MATLAB M Files',
 'Learn MATLAB Matrix',
 'Learn MATLAB Simulink',
 'Learn Node.js',
 'Learn Objective C Programming',
 'Learn OAuth2.0',
 'Learn Pascal Programming',
 'Learn Parrot',
 'Learn CGI with PERL',
 'Learn PHP',
 'Learn PHP-7',
 'Learn Python',
 'Learn Python-3',
 'Learn Prolog',
 'Learn Ruby',
 'Learn

In [8]:
base_url = 'https://www.tutorialspoint.com'
cs_links = []
for extra_index in range(len(all_course_links)):
    #################################################
    all_headings = []
    all_paragraphs = []
    all_chapter_headings = []
    all_ul_list_items = []
    all_ol_list_items = []
    all_tables = []
    all_examples = []
    this_course_heading = []
    this_course_heading.append(all_course_headings[extra_index])
    #################################################
    per_course_url = base_url + all_course_links[extra_index]
    per_course_request = requests.get(per_course_url)
    per_course_soup = BeautifulSoup(per_course_request.content, 'html.parser')
    
    side_links = per_course_soup.find_all('ul', class_ = 'toc chapters')
    #################################################
    chapter_links = []
    for chapter_link in side_links:
        for a in chapter_link.find_all('a'):
            chapter_links.append(a.get('href'))
    #################################################
    untraced_pages = 0
    for chapter_link in chapter_links:
        per_chapter_url = base_url + chapter_link
        per_chapter_request = requests.get(per_chapter_url)
        per_chapter_soup = BeautifulSoup(per_chapter_request.content, 'html.parser')
        
        chapter_data = per_chapter_soup.find_all('div', class_ = 'mui-col-md-6 tutorial-content')
        if len(chapter_data) == 0:
            untraced_pages += 1
            continue
        
        # Getting the Chapter Heading
        all_chapter_headings.append(per_chapter_soup.title.text)
        
        
        #Getting all Headings
        headings_available = ['h1', 'h2', 'h3', 'h4']
        heading = ''
        for head in headings_available:
            this_heading = ''
            this_head = chapter_data[0].find_all(head)
            if len(this_head) == 0:
                continue
            for i in chapter_data[0].find_all(head):
                this_heading += i.text + '@@@'
            heading += this_heading[:-3] + '|||'
        all_headings.append(heading[:-3])
        
        
        # Getting all paragraphs
        paragraphs = ''
        for para in chapter_data[0].find_all('p', class_ = None):
            paragraphs += para.text + '|||'
        all_paragraphs.append(paragraphs[:-3])
        
        
        # Getting all Unordered List items
        ul_list_items = ''
        
        ul_data = chapter_data[0].find_all('ul', class_ = 'list')
        if len(ul_data) !=0:
            for data in ul_data:
                ul = data.find_all('li')
                for li in ul:
                    ul_list_items += li.text + '@@@'
                ul_list_items = ul_list_items[:-3] + '|||'
            ul_list_items = ul_list_items[:-3]
        
        else:
            ul_list_items = None
        all_ul_list_items.append(ul_list_items)
        
        # Getting all Ordered List Items
        ol_list_items = ''
        
        ol_data = chapter_data[0].find_all('ol', class_ = 'list')
        if len(ol_data) !=0:
            for data in ol_data:
                ol = data.find_all('li')
                for li in ol:
                    ol_list_items += li.text + '@@@'
                ol_list_items = ol_list_items[:-3] + '|||'
            ol_list_items = ol_list_items[:-3]
        
        else:
            ol_list_items = None
        all_ol_list_items.append(ol_list_items)
        
        
        # Getting tables data
        table_data = per_chapter_soup.find_all('table', class_ = 'table table-bordered')
        table_str = ''
        if len(table_data) != 0:
            for table in table_data:
                th = ''
                for i in table.find_all('th'):
                    th += i.text + '@@@'
                table_str += th[:-3]
                tr = ''
                for table_row in table.find_all('tr'):
                    td = ''
                    for table_dt in table_row.find_all('td'):
                        td += table_dt.text + '@@@' # for separating each table data of each table
                    tr += td[:-3] + '|||' # for separating each table row of each table
                table_str += tr[:-3] + '&&&' # for separating each table of each page
            table_str = table_str[:-3]
                        
        else:
            table = None
        all_tables.append(table_str)

        
        # Getting code examples giver in each page
        example_code = per_chapter_soup.find_all('pre', class_ = 'prettyprint notranslate')
        example = ''
        if len(example_code) != 0:
            for ex in example_code:
                example += ex.text.replace('\n', '') + '|||'
            example = example[:-3]
        else:
            example = None
        all_examples.append(example)
    ###############################################################
    this_course_heading = this_course_heading * (len(chapter_links) - untraced_pages)
    #######################################################################################################################
    
    # Writing the file
    with open(all_course_headings[extra_index].replace('/', '_') + '.csv', 'w', encoding='utf8', newline='') as f:
        the_writer = writer(f)
        header = ['Course', 'Chapters', 'ALL Headings', 'Paragraphs', 
                  'Unordered List Items', 'Ordered List Items', 'Tables', 'Code Examples']
        the_writer.writerow(header)
        for i in range(len(this_course_heading)):
            row = [this_course_heading[i], all_chapter_headings[i], all_headings[i], all_paragraphs[i],
                  all_ul_list_items[i], all_ol_list_items[i], all_tables[i], all_examples[i]]
            the_writer.writerow(row)
    

In [9]:
# Reading the file
file = pd.read_csv('Learn Unix Sockets.csv')
file = file.replace(np.nan, '', regex=True) # removing the NaN values from the dataset
dataset = pd.DataFrame(file)
dataset

Unnamed: 0,Course,Chapters,ALL Headings,Paragraphs,Unordered List Items,Ordered List Items,Tables,Code Examples
0,Learn Unix Sockets,Unix Socket Tutorial,Unix Socket Tutorial@@@Audience@@@Prerequisite...,Sockets are communication points on the same o...,,,,
1,Learn Unix Sockets,What is a Socket?,What is a Socket?|||Where is Socket Used?@@@So...,Sockets allow communication between two differ...,Stream Sockets − Delivery in a networked envir...,,,
2,Learn Unix Sockets,Unix Socket - Network Addresses,Unix Socket - Network Addresses|||Address Clas...,"Before we proceed with the actual stuff, let u...","Class A addresses begin with 0xxx, or 1 to 126...",,Class@@@Leftmost bits@@@Start address@@@Finish...,
3,Learn Unix Sockets,Unix Socket - Network Host Names,Unix Socket - Network Host Names|||The /etc/ho...,Host names in terms of numbers are difficult t...,,,,
4,Learn Unix Sockets,Unix Socket - Client Server Model,Unix Socket - Client Server Model|||Client Pro...,Most of the Net Applications use the Client-Se...,"2-tier architecture − In this architecture, th...",,,
5,Learn Unix Sockets,Unix Socket - Structures,Unix Socket - Structures|||sockaddr@@@sockaddr...,Various structures are used in Unix Socket Pro...,,,Attribute@@@Values@@@Description|||sa_family@@...,struct sockaddr { unsigned short sa_family...
6,Learn Unix Sockets,Unix Socket - Ports and Services,Unix Socket - Ports and Services|||Example Por...,When a client process wants to a connect a ser...,"struct servent *getservbyname(char *name, char...",,Service@@@Port Number@@@Service Description|||...,struct servent { char *s_name; char **s_...
7,Learn Unix Sockets,Unix Socket - Network Byte Orders,Unix Socket - Network Byte Orders|||Byte Order...,"Unfortunately, not all computers store the byt...","Little Endian − In this scheme, low-order byte...",,Function@@@Description|||htons()@@@Host to Net...,
8,Learn Unix Sockets,Unix Socket - IP Address Functions,Unix Socket - IP Address Functions|||int inet_...,Unix provides various function calls to help y...,"int inet_aton(const char *strptr, struct in_ad...",,,#include <arpa/inet.h>(...) int retval; st...
9,Learn Unix Sockets,Unix Socket - Core Functions,Unix Socket - Core Functions|||The socket Func...,This chapter describes the core socket functio...,sockfd − It is a socket descriptor returned by...,,Family@@@Description|||AF_INET@@@IPv4 protocol...,#include <sys/types.h>#include <sys/socket.h>i...


In [10]:
# Reading the file
file = pd.read_csv('Learn C++.csv')
file = file.replace(np.nan, '', regex=True) # removing the NaN values from the dataset
dataset = pd.DataFrame(file)
dataset

Unnamed: 0,Course,Chapters,ALL Headings,Paragraphs,Unordered List Items,Ordered List Items,Tables,Code Examples
0,Learn C++,C++ Tutorial,C++ Tutorial|||Why to Learn C++@@@Hello World ...,C++ is a middle-level programming language dev...,"C++ is very close to hardware, so you get a ch...",,,#include <iostream>using namespace std;// main...
1,Learn C++,C++ Overview,C++ Overview|||Object-Oriented Programming@@@S...,"C++ is a statically typed, compiled, general-p...",Encapsulation@@@Data hiding@@@Inheritance@@@Po...,,,
2,Learn C++,C++ Environment Setup,C++ Environment Setup|||Local Environment Setu...,If you are still willing to set up your enviro...,,,,
3,Learn C++,C++ Basic Syntax,C++ Basic Syntax|||C++ Program Structure@@@Com...,"When we consider a C++ program, it can be defi...",Object − Objects have states and behaviors. Ex...,,asm@@@else@@@new@@@this|||auto@@@enum@@@operat...,#include <iostream>using namespace std;// main...
4,Learn C++,Comments in C++,Comments in C++|||Useful Video Courses|||C++ O...,Program comments are explanatory statements th...,,,,#include <iostream>using namespace std;main() ...
5,Learn C++,C++ Data Types,C++ Data Types|||Primitive Built-in Types@@@ty...,"While writing program in any language, you nee...",signed@@@unsigned@@@short@@@long,,Type@@@Keyword|||Boolean@@@bool|||Character@@@...,#include <iostream>using namespace std;int mai...
6,Learn C++,C++ Variable Types,C++ Variable Types|||Variable Definition in C+...,A variable provides us with named storage that...,lvalue − Expressions that refer to a memory lo...,,Sr.No@@@Type & Description|||1@@@bool\nStores ...,#include <iostream>using namespace std;// Vari...
7,Learn C++,Variable Scope in C++,Variable Scope in C++|||Local Variables@@@Glob...,A scope is a region of the program and broadly...,Inside a function or a block which is called l...,,Data Type@@@Initializer|||int@@@0|||char@@@'\0...,#include <iostream>using namespace std; int ma...
8,Learn C++,C++ Constants/Literals,C++ Constants/Literals|||Integer Literals@@@Fl...,Constants refer to fixed values that the progr...,A value of true representing true.@@@A value o...,,Escape sequence@@@Meaning|||\\@@@\ character||...,#include <iostream>using namespace std;int mai...
9,Learn C++,C++ Modifier Types,C++ Modifier Types|||Type Qualifiers in C++@@@...,"C++ allows the char, int, and double data typ...",signed@@@unsigned@@@long@@@short,,Sr.No@@@Qualifier & Meaning|||1@@@const\nObjec...,#include <iostream>using namespace std; /* Thi...


In [11]:
# Reading the file
file = pd.read_csv('Learn Python-3.csv')
file = file.replace(np.nan, '', regex=True) # removing the NaN values from the dataset
dataset = pd.DataFrame(file)
dataset

Unnamed: 0,Course,Chapters,ALL Headings,Paragraphs,Unordered List Items,Ordered List Items,Tables,Code Examples
0,Learn Python-3,Python 3 Tutorial,Python 3 Tutorial|||Why to Learn Python 3?@@@C...,"Python is a general-purpose interpreted, inter...",Python is Interpreted − Python is processed at...,,,"print ""Hello, Python!"""
1,Learn Python-3,What is New in Python 3,What is New in Python 3|||The __future__ modul...,Python 3.x introduced some Python 2-incompatib...,,,,"from __future__ import division|||print ""Hello..."
2,Learn Python-3,Python 3 - Overview,Python 3 - Overview|||History of Python@@@Pyth...,"Python is a high-level, interpreted, interacti...",Python is Interpreted − Python is processed at...,,,
3,Learn Python-3,Python 3 - Environment Setup,Python 3 - Environment Setup|||Local Environme...,"Python 3 is available for Windows, Mac OS and ...",Windows x86-64 embeddable zip file@@@Windows x...,,Sr.No.@@@Variable & Description|||1@@@PYTHONPA...,$sudo apt-get install python3-minimal|||Extrac...
4,Learn Python-3,Python 3 - Basic Syntax,Python 3 - Basic Syntax|||First Python Program...,The Python language has many similarities to P...,Class names start with an uppercase letter. Al...,,and@@@exec@@@not|||as@@@finally@@@or|||assert@...,"\r$ python\r\rPython 3.3.2 (default, Dec 10 20..."
5,Learn Python-3,Python 3 - Variable Types,Python 3 - Variable Types|||Assigning Values t...,Variables are nothing but reserved memory loca...,Numbers@@@String@@@List@@@Tuple@@@Dictionary||...,,int@@@float@@@complex|||10@@@0.0@@@3.14j|||100...,#!/usr/bin/python3counter = 100 # An ...
6,Learn Python-3,Python 3 - Basic Operators,Python 3 - Basic Operators|||Types of Operator...,"Operators are the constructs, which can manipu...",Arithmetic Operators@@@Comparison (Relational)...,,Operator@@@Description@@@Example|||+ Addition@...,
7,Learn Python-3,Python 3 - Decision Making,Python 3 - Decision Making|||Single Statement ...,Decision-making is the anticipation of conditi...,,,Sr.No.@@@Statement & Description|||1@@@if stat...,#!/usr/bin/python3var = 100if ( var == 100 ) ...
8,Learn Python-3,Python 3 - Loops,Python 3 - Loops|||Loop Control Statements@@@I...,"In general, statements are executed sequential...",,,Sr.No.@@@Loop Type & Description|||1@@@while l...,"\r#!/usr/bin/python3\rimport sys\r\rlist = [1,..."
9,Learn Python-3,Python 3 - Numbers,Python 3 - Numbers|||Number Type Conversion@@@...,Number data types store numeric values. They a...,int (signed integers) − They are often called ...,,int@@@float@@@complex|||10@@@0.0@@@3.14j|||100...,"var1 = 1var2 = 10|||del var1[,var2[,var3[....,..."
