In [1]:
import requests
from bs4 import BeautifulSoup
import os
from datetime import date
import json


In [2]:
url = "https://ru.wikipedia.org/wiki/Заглавная_страница"

In [3]:
response = requests.get(url)

soup = BeautifulSoup(response.content, 'html.parser')

potd_element = soup.find('div', {'id': 'main-potd'})
img_element = potd_element.find('img')

if 'srcset' in img_element.attrs:
    img_url = img_element['srcset'].split(',')[-1].strip().split(' ')[0]  # Берем последнее изображение в списке
    if img_url.startswith('//'):
        img_url = 'https:' + img_url  # Добавляем https: к относительному URL
else:
    img_url = 'https:' + img_element['src']

today_dir = date.today().isoformat()
os.makedirs(today_dir, exist_ok=True)

img_name = os.path.join(today_dir, img_url.split('/')[-1])
img_response = requests.get(img_url)

with open(img_name, 'wb') as file:
    file.write(img_response.content)

desc_name = os.path.join(today_dir, 'description.txt')
description = img_element['alt']
with open(desc_name, 'w') as file:
    file.write(description)

In [4]:
query = input()

In [5]:
def get_article(query):
	url = f'https://ru.wikipedia.org/w/api.php?action=parse&page={query}&format=json'
	response = requests.get(url)

	data = response.json()
	return data['parse']


In [6]:
article = get_article(query)

In [7]:
for image in article['images']:
	image_url = f'https://commons.wikimedia.org/wiki/File:{image}?uselang=ru'
	print(image_url)

https://commons.wikimedia.org/wiki/File:Original_1T1C_DRAM_design-ru.svg?uselang=ru
https://commons.wikimedia.org/wiki/File:Square_array_of_mosfet_cells_read.png?uselang=ru
https://commons.wikimedia.org/wiki/File:Square_array_of_mosfet_cells_write.png?uselang=ru
https://commons.wikimedia.org/wiki/File:RAM_n.jpg?uselang=ru
https://commons.wikimedia.org/wiki/File:SO-DIMM_72pin.jpg?uselang=ru
https://commons.wikimedia.org/wiki/File:1GB_DDR2_SO-DIMM.png?uselang=ru
https://commons.wikimedia.org/wiki/File:Wiki_letter_w.svg?uselang=ru
https://commons.wikimedia.org/wiki/File:Wikipedia_interwiki_section_gear_icon.svg?uselang=ru


In [8]:
for link in article['links']:
	print(f'https://ru.wikipedia.org/wiki/{link["*"].replace(" ", "_")}')	

https://ru.wikipedia.org/wiki/Intel_1103
https://ru.wikipedia.org/wiki/MDRAM
https://ru.wikipedia.org/wiki/UniDIMM
https://ru.wikipedia.org/wiki/1966_год
https://ru.wikipedia.org/wiki/ADATA_Technology
https://ru.wikipedia.org/wiki/Apacer_Technology_Inc.
https://ru.wikipedia.org/wiki/BGA
https://ru.wikipedia.org/wiki/DDR2_SDRAM
https://ru.wikipedia.org/wiki/DDR3_SDRAM
https://ru.wikipedia.org/wiki/DDR4_SDRAM
https://ru.wikipedia.org/wiki/DDR5_SDRAM
https://ru.wikipedia.org/wiki/DDR_SDRAM
https://ru.wikipedia.org/wiki/DIMM
https://ru.wikipedia.org/wiki/DIP
https://ru.wikipedia.org/wiki/EDO_DRAM
https://ru.wikipedia.org/wiki/EDO_RAM
https://ru.wikipedia.org/wiki/EDRAM
https://ru.wikipedia.org/wiki/FB-DIMM
https://ru.wikipedia.org/wiki/FPM_DRAM
https://ru.wikipedia.org/wiki/FPM_RAM
https://ru.wikipedia.org/wiki/GDDR
https://ru.wikipedia.org/wiki/GDDR2
https://ru.wikipedia.org/wiki/GDDR3
https://ru.wikipedia.org/wiki/GDDR4
https://ru.wikipedia.org/wiki/GDDR5
https://ru.wikipedia.org/wiki/GD

In [9]:
article['text']['*']



In [10]:
image_urls = []
for image in article['images']:
    image_url = f'https://commons.wikimedia.org/wiki/File:{image}?uselang=ru'
    image_urls.append(image_url)

article_links = []
for link in article['links']:
    article_links.append(f'https://ru.wikipedia.org/wiki/{link["*"].replace(" ", "_")}')

# Создание структуры данных
data = {
    'images': image_urls,
    'links': article_links,
    'text': article['text']['*']
}

# Сохранение в JSON файл
with open('article_data.json', 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, ensure_ascii=False, indent=4)