In [21]:
import re
def extract_year_and_last_number(input_string):
    match = re.search(r'(\d{4})-(\d+)$', input_string)
    if match:
        year = int(match.group(1))  # Extract the year
        last_number = int(match.group(2))  # Extract the last number
        return year, last_number
    return None, None

def make_pdf_url(gazzette_id):
    # check gazette_id is float then return none
    if type(gazzette_id) == float:
        return None
    year, gazzete_num = extract_year_and_last_number(gazzette_id)
    if year and gazzete_num:
        return f'https://egazette.gov.in/WriteReadData/{year}/{gazzete_num}.pdf'
    return None

In [22]:
extract_year_and_last_number("CG-DL-E-22032025-261873")

(2025, 261873)

In [23]:
egazzete_csv_file = "egazette_data.csv"
pdf_folder = "pdfs"

import pandas as pd

df = pd.read_csv(egazzete_csv_file)

df['pdf_url'] = df['Gazette ID'].apply(make_pdf_url)

In [25]:
df[df['pdf_url'].isna() == False]

Unnamed: 0,S. No.,Ministry / OrganizationDepartmentOffice,Gazette CategoryPart & SectionSubject,Publish Date,Gazette ID,ViewGazette,pdf_url
0,1.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareDrugs Regulation Section,"Extra OrdinaryPart II-Section 3-Sub-Section (ii)Issue of Notification for Constitution of the Institute Body of National Institute of Mental Health and Neurosciences, Bangalore",22-Mar-2025,CG-DL-E-22032025-261873,0.86 MB,https://egazette.gov.in/WriteReadData/2025/261873.pdf
1,2.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareNot Applicable,Extra OrdinaryPart II-Section 3-Sub-Section (ii)Notification regarding ABEAS System in Pharmacy reg,22-Mar-2025,CG-DL-E-22032025-261874,0.77 MB,https://egazette.gov.in/WriteReadData/2025/261874.pdf
2,3.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareDrugs Regulation Section,Extra OrdinaryPart II-Section 3-Sub-Section (ii)Banning of Chloramphenicol and Nitrofurans in food rearing animal systems,13-Mar-2025,CG-DL-E-13032025-261585,1.74 MB,https://egazette.gov.in/WriteReadData/2025/261585.pdf
3,4.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareDrugs Regulation Section,Extra OrdinaryPart II-Section 3-Sub-Section (ii)Reconstitution of the Drugs Technical Advisory Board,13-Mar-2025,CG-DL-E-13032025-261584,1.85 MB,https://egazette.gov.in/WriteReadData/2025/261584.pdf
4,5.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareNot Applicable,Extra OrdinaryPart II-Section 3-Sub-Section (ii)Notification of Members of NCAHP,03-Mar-2025,CG-DL-E-03032025-261421,1.88 MB,https://egazette.gov.in/WriteReadData/2025/261421.pdf
...,...,...,...,...,...,...,...
319,320.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareNot Applicable,Extra OrdinaryPart II-Section 3-Sub-Section (ii)In partial modification of Hindi version of notification S O 383 E dated 27 01 2020 published in Part II,06-Feb-2020,CG-DL-E-06022020-215943,0.80 MB,https://egazette.gov.in/WriteReadData/2020/215943.pdf
320,321.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareNot Applicable,Extra OrdinaryPart II-Section 3-Sub-Section (ii)In exercise of the powers conferred by sub section 2 of section 20 of the Drugs and Cosmetics Act,31-Jan-2020,CG-DL-E-31012020-215843,0.91 MB,https://egazette.gov.in/WriteReadData/2020/215843.pdf
321,322.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareNot Applicable,Extra OrdinaryPart II-Section 3-Sub-Section (ii)In exercise of powers conferred by clause a b c d e f and g of Section 5 of the Post Graduate Institute of Medical Education and Research Chandigarh Act,29-Jan-2020,CG-DL-E-29012020-215770,0.81 MB,https://egazette.gov.in/WriteReadData/2020/215770.pdf
322,323.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareNot Applicable,"Extra OrdinaryPart II-Section 3-Sub-Section (i)Whereas the Central Government, on being satisfied that the use of the drug Oxytocin",27-Jan-2020,CG-DL-E-27012020-215741,0.73 MB,https://egazette.gov.in/WriteReadData/2020/215741.pdf


In [46]:
import requests
import os
import PyPDF2

def download_pdf(pdf_url: str):
    if isinstance(pdf_url, float):
        return None
    if pdf_url is None:
        return None
    if len(pdf_url) == 0:
        return None
    if not os.path.exists(pdf_folder):
        os.makedirs(pdf_folder)
    pdf_file = os.path.join(pdf_folder, pdf_url.split('/')[-1])
    if os.path.exists(pdf_file):
        return pdf_file
    response = requests.get(pdf_url)
    with open(pdf_file, 'wb') as f:
        f.write(response.content)
    return pdf_file

def count_pages_in_pdf(pdf_file):
    if pdf_file is None:
        return None
    if not os.path.exists(pdf_file):
        return None
    pdf_file_obj = open(pdf_file, 'rb')
    pdf_reader = PyPDF2.PdfReader(pdf_file_obj)
    return len(pdf_reader.pages)

In [33]:
df['pdf_file'] = df['pdf_url'].apply(download_pdf)

In [47]:
df['pages'] = df['pdf_file'].apply(count_pages_in_pdf)

In [48]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 620 entries, 0 to 619
Data columns (total 9 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   S. No.                                   620 non-null    float64
 1   Ministry / OrganizationDepartmentOffice  620 non-null    object 
 2   Gazette CategoryPart & SectionSubject    620 non-null    object 
 3   Publish Date                             620 non-null    object 
 4   Gazette ID                               324 non-null    object 
 5   ViewGazette                              620 non-null    object 
 6   pdf_url                                  324 non-null    object 
 7   pdf_file                                 324 non-null    object 
 8   pages                                    324 non-null    float64
dtypes: float64(2), object(7)
memory usage: 43.7+ KB


In [73]:
from google import genai
from google.genai import types
import base64

total_prompt_count = 0
total_candidate_count = 0

def read_pdf_as_mime_text(pdf_file_path):
    with open(pdf_file_path, "rb") as pdf_file:
        # Read the binary content of the PDF
        pdf_binary = pdf_file.read()
        # Encode the binary content to Base64
        mime_text = base64.b64encode(pdf_binary).decode("utf-8")
    return mime_text

def generate(pdf_file_path, num_pages):
    global total_prompt_count, total_candidate_count
    if isinstance(pdf_file_path, float):
        return None
    if pdf_file_path is None:
        return None
    
    if num_pages > 5:
        return None
    
    pdf_mime_text = read_pdf_as_mime_text(pdf_file_path)
    client = genai.Client(
        vertexai=True,
        project="vertex-ai-im",
        location="us-central1",
    )
    document1 = types.Part.from_bytes(
        data=base64.b64decode(pdf_mime_text),
        mime_type="application/pdf",
    )
    
    text1 = types.Part.from_text(text="""summarize this document in and list out drugs mentioned in the doc and does this document talk about drugs regulation on manufactoring sale or importing or anything similar
    Give me output in json format containing keys summary, drugs_list""")
    
    model = "gemini-2.0-flash-001"
    contents = [
        types.Content(
        role="user",
        parts=[
            document1,
            text1
        ]
        )
    ]
    
    generate_content_config = types.GenerateContentConfig(
        temperature = 1,
        top_p = 0.95,
        max_output_tokens = 8192,
        response_modalities = ["TEXT"],
        safety_settings = [types.SafetySetting(
            category="HARM_CATEGORY_HATE_SPEECH",
            threshold="OFF"
        ),types.SafetySetting(
            category="HARM_CATEGORY_DANGEROUS_CONTENT",
            threshold="OFF"
        ),types.SafetySetting(
            category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
            threshold="OFF"
        ),types.SafetySetting(
            category="HARM_CATEGORY_HARASSMENT",
            threshold="OFF"
        )],
    )
    
    final_text = ""
    response = client.models.generate_content(
        model = model,
        contents = contents,
        config = generate_content_config,
        )
    
    total_prompt_count = total_prompt_count + response.usage_metadata.prompt_token_count
    total_candidate_count = total_candidate_count + response.usage_metadata.candidates_token_count
    print(total_prompt_count, total_candidate_count)
    # print(response.usage_metadata.prompt_token_count)
    # print(response.usage_metadata.candidates_token_count)
    final_text = response.candidates[0].content.parts[0].text
    return final_text

In [52]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/ksumit/Documents/secrets/vertex-ai-im-6882a1563a11.json'

In [86]:
import json
def extract_json_from_string(input_string):
    if isinstance(input_string, float):
        return None
    if input_string is None:
        return None
    match = re.search(r"```json\s*(\{.*?\})\s*```", input_string, re.DOTALL)
    try:
        if match:
            json_part = match.group(1)  # Extract the JSON part
            return json.loads(json_part)  # Convert JSON string to dictionary
        else:
            print(input_string)
            raise ValueError("No valid JSON found in the input string.")
    except Exception as e:
        print(input_string)
        print(e)
        return None


In [68]:
extract_json_from_string(generate("pdfs/261585.pdf", 2))

cache_tokens_details=None cached_content_token_count=None candidates_token_count=179 candidates_tokens_details=[ModalityTokenCount(modality=<MediaModality.TEXT: 'TEXT'>, token_count=179)] prompt_token_count=561 prompt_tokens_details=[ModalityTokenCount(modality=<MediaModality.DOCUMENT: 'DOCUMENT'>, token_count=516), ModalityTokenCount(modality=<MediaModality.TEXT: 'TEXT'>, token_count=45)] thoughts_token_count=None tool_use_prompt_token_count=None tool_use_prompt_tokens_details=None total_token_count=740


{'summary': 'This document is a notification from the Ministry of Health and Family Welfare, Government of India, prohibiting the import, manufacture, sale, and distribution of drug formulations containing Chloramphenicol or Nitrofurans for use in any food-producing animal rearing system. The government has taken this decision because the use of these drugs in animal husbandry poses a risk, and safer alternatives are available. The decision is made under Section 26A of the Drugs and Cosmetics Act, 1940, after consulting with the Drugs Technical Advisory Board.',
 'drugs_list': ['Chloramphenicol', 'Nitrofurans'],
 'regulation_aspects': ['Import', 'Manufacture', 'Sale', 'Distribution']}

In [54]:
generate("pdfs/261585.pdf")

```json
{
  "summary": "This is an official notification from the Ministry of Health and Family Welfare, Government of India. It prohibits the import, manufacture, sale, and distribution of drug formulations containing Chloramphenicol or Nitrofurans for use in any food-producing animal rearing system. The decision is based on the risk associated with these drugs and the availability of safer alternatives.",
  "drugs_list": [
    "Chloramphenicol",
    "Nitrofurans"
  ],
  "regulatory_aspects": [
    "Prohibition of import",
    "Prohibition of manufacture",
    "Prohibition of sale",
    "Prohibition of distribution"
  ]
}
```


In [74]:
df['llm_response'] = df.apply(lambda x: generate(x['pdf_file'], x['pages']), axis=1)

1077 211
1638 430
2199 607
3276 771
3837 1006
4398 1189
5217 1345
6294 1549
6855 1737
7416 1911
8493 2104
9054 2276
9615 2437
10950 2618
12027 2824
12588 3025
13149 3194
13710 3380
14529 3538
15090 3713
15651 3893
16212 4095
16773 4259
17334 4493
18669 4643
19746 4865
20565 5062
21126 5218
21687 5388
22248 5517
22809 5723
24144 5872
25221 6104
26040 6355
26859 6566
27420 6777
27981 6973
28542 7098
29361 7258
30180 7535
30741 7656
31302 7899
31863 8124
32682 8267
33243 8452
33804 8633
35139 8774
35700 8909
36261 9075
36822 9266
37383 9469
38718 9653
39279 9786
39840 9996
40401 10189
40962 10379
41523 10578
42084 10700
42645 10874
43206 11064
43767 11266
44328 11411
44889 11585
45450 11759
46785 11900
47346 12079
47907 12230
49242 12429
49803 12638
50364 12966
50925 13092
51486 13264
52047 13516
52608 13789
53169 13889
53730 14048
54291 14221
55626 14390
56187 14601
57006 14710
57567 14896
58128 15167
58689 15319
59250 15502
59811 15665
61146 15805
61707 15939
62268 16134
63087 16320
641

In [75]:
df.to_csv("egazette_data_with_llm.csv", index=False)

In [80]:
import pandas as pd
df = pd.read_csv("egazette_data_with_llm.csv")

In [88]:
df['llm_response_json'] = df['llm_response'].apply(extract_json_from_string)

```json
{
  "summary": "This document is a notification from the Ministry of Health and Family Welfare, Government of India, amending the National Medical Commission, Autonomous Boards (Manner of Appointment of Fourth Member and the Salary, Allowances and Terms and Conditions of Service, and Declaration of Assets, Professional and Commercial Engagements of President and Members) Rules, 2019. The amendment pertains to the contributory provident fund rules applicable to the President and whole-time members of the autonomous boards under the National Medical Commission. It is effective retroactively from September 25, 2020, to facilitate the deposit of Employee Provident Fund contributions for incumbents who joined the National Medical Commission and come under the Employee Provident Fund regulations.The notification ensures that these provisions do not adversely affect anyone.",
  "drugs_list": [],
   "drugs_regulation": "This document does not talk about drugs regulation, manufactoring,

In [89]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 620 entries, 0 to 619
Data columns (total 11 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   S. No.                                   620 non-null    float64
 1   Ministry / OrganizationDepartmentOffice  620 non-null    object 
 2   Gazette CategoryPart & SectionSubject    620 non-null    object 
 3   Publish Date                             620 non-null    object 
 4   Gazette ID                               324 non-null    object 
 5   ViewGazette                              620 non-null    object 
 6   pdf_url                                  324 non-null    object 
 7   pdf_file                                 324 non-null    object 
 8   pages                                    324 non-null    float64
 9   llm_response                             278 non-null    object 
 10  llm_response_json                        277 non-n

In [90]:
df['drugs_list'] = df['llm_response_json'].apply(lambda x: x['drugs_list'] if x is not None else None)

In [93]:
df['summary'] = df['llm_response_json'].apply(lambda x: x['summary'] if x is not None else None)

In [94]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 620 entries, 0 to 619
Data columns (total 13 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   S. No.                                   620 non-null    float64
 1   Ministry / OrganizationDepartmentOffice  620 non-null    object 
 2   Gazette CategoryPart & SectionSubject    620 non-null    object 
 3   Publish Date                             620 non-null    object 
 4   Gazette ID                               324 non-null    object 
 5   ViewGazette                              620 non-null    object 
 6   pdf_url                                  324 non-null    object 
 7   pdf_file                                 324 non-null    object 
 8   pages                                    324 non-null    float64
 9   llm_response                             278 non-null    object 
 10  llm_response_json                        277 non-n

In [95]:
df.to_csv("egazette_data_with_llm.csv", index=False)

In [92]:
df.head()

Unnamed: 0,S. No.,Ministry / OrganizationDepartmentOffice,Gazette CategoryPart & SectionSubject,Publish Date,Gazette ID,ViewGazette,pdf_url,pdf_file,pages,llm_response,llm_response_json,drugs_list
0,1.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareDrugs Regulation Section,"Extra OrdinaryPart II-Section 3-Sub-Section (ii)Issue of Notification for Constitution of the Institute Body of National Institute of Mental Health and Neurosciences, Bangalore",22-Mar-2025,CG-DL-E-22032025-261873,0.86 MB,https://egazette.gov.in/WriteReadData/2025/261873.pdf,pdfs/261873.pdf,4.0,"```json\n{\n ""summary"": ""This document is a notification from the Ministry of Health and Family Welfare, Government of India. It concerns the constitution of the National Institute of Mental Health and Neuro-Sciences (NIMHANS), Bangalore, in accordance with the National Institute of Mental Health and Neuro-Sciences, Bangalore Act, 2012. The notification lists the members of the Institute, including ex-officio members (officials from various government departments and universities) and nominated members (experts in related fields). It specifies the positions held by these individuals and their affiliation to various institutions. The duration of the members' tenure and addressing vacancies will be governed by the provisions outlined in the NIMHANS Act, 2012."",\n ""drugs_list"": [],\n ""drugs_regulation"": ""This document does not specifically address drugs regulation, manufacturing, sale, or importing. It is solely focused on the composition of the governing body of NIMHANS, a mental health and neuroscience institute.""\n}\n```","{'summary': 'This document is a notification from the Ministry of Health and Family Welfare, Government of India. It concerns the constitution of the National Institute of Mental Health and Neuro-Sciences (NIMHANS), Bangalore, in accordance with the National Institute of Mental Health and Neuro-Sciences, Bangalore Act, 2012. The notification lists the members of the Institute, including ex-officio members (officials from various government departments and universities) and nominated members (experts in related fields). It specifies the positions held by these individuals and their affiliation to various institutions. The duration of the members' tenure and addressing vacancies will be governed by the provisions outlined in the NIMHANS Act, 2012.', 'drugs_list': [], 'drugs_regulation': 'This document does not specifically address drugs regulation, manufacturing, sale, or importing. It is solely focused on the composition of the governing body of NIMHANS, a mental health and neuroscience institute.'}",[]
1,2.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareNot Applicable,Extra OrdinaryPart II-Section 3-Sub-Section (ii)Notification regarding ABEAS System in Pharmacy reg,22-Mar-2025,CG-DL-E-22032025-261874,0.77 MB,https://egazette.gov.in/WriteReadData/2025/261874.pdf,pdfs/261874.pdf,2.0,"```json\n{\n ""summary"": ""This is a notification from the Ministry of Health and Family Welfare, Government of India, dated March 21, 2025. It amends a previous notification (S.O. 5041(E) dated November 23, 2023) related to the Aadhaar Authentication for Good Governance Rules, 2020, and the Aadhaar (Targeted Delivery of Financial and Other Subsidies, Benefits and Services) Act, 2016. The amendment allows the Pharmacy Council of India (PCI) to access online attendance data of officials, faculty, and students of PCI-approved pharmacy colleges through the centralized Aadhaar Enabled Biometric Attendance System. The notification also mandates adherence to the provisions of the Aadhaar Act, rules, regulations, guidelines, and directions related to the use of Aadhaar authentication as specified by the Unique Identification Authority of India or the Central Government."",\n ""drugs_list"": [],\n ""drugs_regulation_mentioned"": false\n}\n```","{'summary': 'This is a notification from the Ministry of Health and Family Welfare, Government of India, dated March 21, 2025. It amends a previous notification (S.O. 5041(E) dated November 23, 2023) related to the Aadhaar Authentication for Good Governance Rules, 2020, and the Aadhaar (Targeted Delivery of Financial and Other Subsidies, Benefits and Services) Act, 2016. The amendment allows the Pharmacy Council of India (PCI) to access online attendance data of officials, faculty, and students of PCI-approved pharmacy colleges through the centralized Aadhaar Enabled Biometric Attendance System. The notification also mandates adherence to the provisions of the Aadhaar Act, rules, regulations, guidelines, and directions related to the use of Aadhaar authentication as specified by the Unique Identification Authority of India or the Central Government.', 'drugs_list': [], 'drugs_regulation_mentioned': False}",[]
2,3.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareDrugs Regulation Section,Extra OrdinaryPart II-Section 3-Sub-Section (ii)Banning of Chloramphenicol and Nitrofurans in food rearing animal systems,13-Mar-2025,CG-DL-E-13032025-261585,1.74 MB,https://egazette.gov.in/WriteReadData/2025/261585.pdf,pdfs/261585.pdf,2.0,"```json\n{\n ""summary"": ""This is an official gazette notification from the Ministry of Health and Family Welfare, Government of India, dated March 12, 2025. It prohibits the import, manufacture, sale, and distribution of drug formulations containing Chloramphenicol or Nitrofurans for use in any food-producing animal rearing system. The decision is based on concerns about potential risks associated with these drugs and the availability of safer alternatives, and it is made in the public interest, following consultation with the Drugs Technical Advisory Board."",\n ""drugs_list"": [\n ""Chloramphenicol"",\n ""Nitrofurans""\n ],\n""drugs_regulation"": [\n ""prohibit import"",\n ""manufacture"",\n ""sale"",\n ""distribution""\n ]\n}\n```","{'summary': 'This is an official gazette notification from the Ministry of Health and Family Welfare, Government of India, dated March 12, 2025. It prohibits the import, manufacture, sale, and distribution of drug formulations containing Chloramphenicol or Nitrofurans for use in any food-producing animal rearing system. The decision is based on concerns about potential risks associated with these drugs and the availability of safer alternatives, and it is made in the public interest, following consultation with the Drugs Technical Advisory Board.', 'drugs_list': ['Chloramphenicol', 'Nitrofurans'], 'drugs_regulation': ['prohibit import', 'manufacture', 'sale', 'distribution']}","[Chloramphenicol, Nitrofurans]"
3,4.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareDrugs Regulation Section,Extra OrdinaryPart II-Section 3-Sub-Section (ii)Reconstitution of the Drugs Technical Advisory Board,13-Mar-2025,CG-DL-E-13032025-261584,1.85 MB,https://egazette.gov.in/WriteReadData/2025/261584.pdf,pdfs/261584.pdf,4.0,"```json\n{\n ""summary"": ""This document is a gazette notification from the Ministry of Health and Family Welfare, Department of Health and Family Welfare, Government of India. It announces the reconstitution of the Drugs Technical Advisory Board, as per the Drugs and Cosmetics Act, 1940. The notification lists the members of the newly formed board along with their official position."",\n ""drugs_list"": [\n ""Not specifically mentioned in the summary, but the context implies that the document pertains to the regulation of Drugs and Cosmetics in general.""\n ],\n ""regulation_focus"": ""The gazette notification pertains to regulatory aspects of Drug and Cosmetics Act, 1940 but doesn't specify any of manufactoring sale or importing regulation. ""\n}\n```","{'summary': 'This document is a gazette notification from the Ministry of Health and Family Welfare, Department of Health and Family Welfare, Government of India. It announces the reconstitution of the Drugs Technical Advisory Board, as per the Drugs and Cosmetics Act, 1940. The notification lists the members of the newly formed board along with their official position.', 'drugs_list': ['Not specifically mentioned in the summary, but the context implies that the document pertains to the regulation of Drugs and Cosmetics in general.'], 'regulation_focus': 'The gazette notification pertains to regulatory aspects of Drug and Cosmetics Act, 1940 but doesn't specify any of manufactoring sale or importing regulation. '}","[Not specifically mentioned in the summary, but the context implies that the document pertains to the regulation of Drugs and Cosmetics in general.]"
4,5.0,Central GovernmentMinistry of Health and Family WelfareDepartment of Health and Family WelfareNot Applicable,Extra OrdinaryPart II-Section 3-Sub-Section (ii)Notification of Members of NCAHP,03-Mar-2025,CG-DL-E-03032025-261421,1.88 MB,https://egazette.gov.in/WriteReadData/2025/261421.pdf,pdfs/261421.pdf,6.0,,,


In [49]:
df['pages'].value_counts()

pages
2.0     170
3.0      57
4.0      18
5.0      17
1.0      16
6.0       8
8.0       5
9.0       4
16.0      4
18.0      3
7.0       3
13.0      2
14.0      2
22.0      2
64.0      2
10.0      1
32.0      1
11.0      1
29.0      1
12.0      1
53.0      1
70.0      1
20.0      1
58.0      1
83.0      1
30.0      1
Name: count, dtype: int64

In [34]:
df.columns

Index(['S. No.', 'Ministry / OrganizationDepartmentOffice',
       'Gazette CategoryPart & SectionSubject', 'Publish Date', 'Gazette ID',
       'ViewGazette', 'pdf_url', 'pdf_file'],
      dtype='object')